1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) { 29 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 30 } 31 } 32 33 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 34 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 35 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 36 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 37 38 for (i=0; i<A->rmap->n; i++) { 39 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 40 va[i] = vb[i]; 41 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 42 } 43 } 44 45 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 46 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 47 ierr = PetscFree(idxb);CHKERRQ(ierr); 48 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 52 #undef __FUNCT__ 53 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 54 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 55 { 56 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 57 PetscErrorCode ierr; 58 59 PetscFunctionBegin; 60 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 61 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 62 PetscFunctionReturn(0); 63 } 64 65 #undef __FUNCT__ 66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 67 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 68 { 69 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 74 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 75 PetscFunctionReturn(0); 76 } 77 78 /* 79 Local utility routine that creates a mapping from the global column 80 number to the local number in the off-diagonal part of the local 81 storage of the matrix. This is done in a non scalable way since the 82 length of colmap equals the global matrix length. 83 */ 84 #undef __FUNCT__ 85 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 86 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 87 { 88 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 89 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 90 PetscErrorCode ierr; 91 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 92 93 PetscFunctionBegin; 94 #if defined(PETSC_USE_CTABLE) 95 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 96 for (i=0; i<nbs; i++) { 97 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 98 } 99 #else 100 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 101 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 102 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 103 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 104 #endif 105 PetscFunctionReturn(0); 106 } 107 108 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 109 { \ 110 \ 111 brow = row/bs; \ 112 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 113 rmax = aimax[brow]; nrow = ailen[brow]; \ 114 bcol = col/bs; \ 115 ridx = row % bs; cidx = col % bs; \ 116 low = 0; high = nrow; \ 117 while (high-low > 3) { \ 118 t = (low+high)/2; \ 119 if (rp[t] > bcol) high = t; \ 120 else low = t; \ 121 } \ 122 for (_i=low; _i<high; _i++) { \ 123 if (rp[_i] > bcol) break; \ 124 if (rp[_i] == bcol) { \ 125 bap = ap + bs2*_i + bs*cidx + ridx; \ 126 if (addv == ADD_VALUES) *bap += value; \ 127 else *bap = value; \ 128 goto a_noinsert; \ 129 } \ 130 } \ 131 if (a->nonew == 1) goto a_noinsert; \ 132 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 133 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 134 N = nrow++ - 1; \ 135 /* shift up all the later entries in this row */ \ 136 for (ii=N; ii>=_i; ii--) { \ 137 rp[ii+1] = rp[ii]; \ 138 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 139 } \ 140 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 141 rp[_i] = bcol; \ 142 ap[bs2*_i + bs*cidx + ridx] = value; \ 143 a_noinsert:; \ 144 ailen[brow] = nrow; \ 145 } 146 147 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 148 { \ 149 brow = row/bs; \ 150 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 151 rmax = bimax[brow]; nrow = bilen[brow]; \ 152 bcol = col/bs; \ 153 ridx = row % bs; cidx = col % bs; \ 154 low = 0; high = nrow; \ 155 while (high-low > 3) { \ 156 t = (low+high)/2; \ 157 if (rp[t] > bcol) high = t; \ 158 else low = t; \ 159 } \ 160 for (_i=low; _i<high; _i++) { \ 161 if (rp[_i] > bcol) break; \ 162 if (rp[_i] == bcol) { \ 163 bap = ap + bs2*_i + bs*cidx + ridx; \ 164 if (addv == ADD_VALUES) *bap += value; \ 165 else *bap = value; \ 166 goto b_noinsert; \ 167 } \ 168 } \ 169 if (b->nonew == 1) goto b_noinsert; \ 170 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 171 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 172 CHKMEMQ; \ 173 N = nrow++ - 1; \ 174 /* shift up all the later entries in this row */ \ 175 for (ii=N; ii>=_i; ii--) { \ 176 rp[ii+1] = rp[ii]; \ 177 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 178 } \ 179 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 180 rp[_i] = bcol; \ 181 ap[bs2*_i + bs*cidx + ridx] = value; \ 182 b_noinsert:; \ 183 bilen[brow] = nrow; \ 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatSetValues_MPIBAIJ" 188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 189 { 190 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 191 MatScalar value; 192 PetscBool roworiented = baij->roworiented; 193 PetscErrorCode ierr; 194 PetscInt i,j,row,col; 195 PetscInt rstart_orig=mat->rmap->rstart; 196 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 197 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 198 199 /* Some Variables required in the macro */ 200 Mat A = baij->A; 201 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 202 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 203 MatScalar *aa =a->a; 204 205 Mat B = baij->B; 206 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 207 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 208 MatScalar *ba =b->a; 209 210 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 211 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 212 MatScalar *ap,*bap; 213 214 PetscFunctionBegin; 215 if (v) PetscValidScalarPointer(v,6); 216 for (i=0; i<m; i++) { 217 if (im[i] < 0) continue; 218 #if defined(PETSC_USE_DEBUG) 219 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 220 #endif 221 if (im[i] >= rstart_orig && im[i] < rend_orig) { 222 row = im[i] - rstart_orig; 223 for (j=0; j<n; j++) { 224 if (in[j] >= cstart_orig && in[j] < cend_orig) { 225 col = in[j] - cstart_orig; 226 if (roworiented) value = v[i*n+j]; 227 else value = v[i+j*m]; 228 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 229 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 230 } else if (in[j] < 0) continue; 231 #if defined(PETSC_USE_DEBUG) 232 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 #if defined(PETSC_USE_CTABLE) 240 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 241 col = col - 1; 242 #else 243 col = baij->colmap[in[j]/bs] - 1; 244 #endif 245 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 246 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 247 col = in[j]; 248 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 249 B = baij->B; 250 b = (Mat_SeqBAIJ*)(B)->data; 251 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 252 ba =b->a; 253 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 254 else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; 257 else value = v[i+j*m]; 258 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 259 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 260 } 261 } 262 } else { 263 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 264 if (!baij->donotstash) { 265 mat->assembled = PETSC_FALSE; 266 if (roworiented) { 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 268 } else { 269 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 270 } 271 } 272 } 273 } 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 279 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 280 { 281 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 282 const PetscScalar *value; 283 MatScalar *barray = baij->barray; 284 PetscBool roworiented = baij->roworiented; 285 PetscErrorCode ierr; 286 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 287 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 288 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 289 290 PetscFunctionBegin; 291 if (!barray) { 292 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 293 baij->barray = barray; 294 } 295 296 if (roworiented) stepval = (n-1)*bs; 297 else stepval = (m-1)*bs; 298 299 for (i=0; i<m; i++) { 300 if (im[i] < 0) continue; 301 #if defined(PETSC_USE_DEBUG) 302 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 303 #endif 304 if (im[i] >= rstart && im[i] < rend) { 305 row = im[i] - rstart; 306 for (j=0; j<n; j++) { 307 /* If NumCol = 1 then a copy is not required */ 308 if ((roworiented) && (n == 1)) { 309 barray = (MatScalar*)v + i*bs2; 310 } else if ((!roworiented) && (m == 1)) { 311 barray = (MatScalar*)v + j*bs2; 312 } else { /* Here a copy is required */ 313 if (roworiented) { 314 value = v + (i*(stepval+bs) + j)*bs; 315 } else { 316 value = v + (j*(stepval+bs) + i)*bs; 317 } 318 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 319 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 320 barray += bs; 321 } 322 barray -= bs2; 323 } 324 325 if (in[j] >= cstart && in[j] < cend) { 326 col = in[j] - cstart; 327 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 328 } else if (in[j] < 0) continue; 329 #if defined(PETSC_USE_DEBUG) 330 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 331 #endif 332 else { 333 if (mat->was_assembled) { 334 if (!baij->colmap) { 335 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 336 } 337 338 #if defined(PETSC_USE_DEBUG) 339 #if defined(PETSC_USE_CTABLE) 340 { PetscInt data; 341 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 342 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 343 } 344 #else 345 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 #endif 347 #endif 348 #if defined(PETSC_USE_CTABLE) 349 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 350 col = (col - 1)/bs; 351 #else 352 col = (baij->colmap[in[j]] - 1)/bs; 353 #endif 354 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 355 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 356 col = in[j]; 357 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 358 } else col = in[j]; 359 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 360 } 361 } 362 } else { 363 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 364 if (!baij->donotstash) { 365 if (roworiented) { 366 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 367 } else { 368 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 369 } 370 } 371 } 372 } 373 PetscFunctionReturn(0); 374 } 375 376 #define HASH_KEY 0.6180339887 377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 378 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 379 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 #undef __FUNCT__ 381 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 382 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 383 { 384 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 385 PetscBool roworiented = baij->roworiented; 386 PetscErrorCode ierr; 387 PetscInt i,j,row,col; 388 PetscInt rstart_orig=mat->rmap->rstart; 389 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 390 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 391 PetscReal tmp; 392 MatScalar **HD = baij->hd,value; 393 #if defined(PETSC_USE_DEBUG) 394 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 395 #endif 396 397 PetscFunctionBegin; 398 if (v) PetscValidScalarPointer(v,6); 399 for (i=0; i<m; i++) { 400 #if defined(PETSC_USE_DEBUG) 401 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 402 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 403 #endif 404 row = im[i]; 405 if (row >= rstart_orig && row < rend_orig) { 406 for (j=0; j<n; j++) { 407 col = in[j]; 408 if (roworiented) value = v[i*n+j]; 409 else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 423 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 424 } 425 } 426 #else 427 if (HT[idx] != key) { 428 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 429 if (idx == size) { 430 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 431 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 432 } 433 } 434 #endif 435 /* A HASH table entry is found, so insert the values at the correct address */ 436 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 437 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 438 } 439 } else if (!baij->donotstash) { 440 if (roworiented) { 441 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 442 } else { 443 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 444 } 445 } 446 } 447 #if defined(PETSC_USE_DEBUG) 448 baij->ht_total_ct = total_ct; 449 baij->ht_insert_ct = insert_ct; 450 #endif 451 PetscFunctionReturn(0); 452 } 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 456 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 457 { 458 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 459 PetscBool roworiented = baij->roworiented; 460 PetscErrorCode ierr; 461 PetscInt i,j,ii,jj,row,col; 462 PetscInt rstart=baij->rstartbs; 463 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 464 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 465 PetscReal tmp; 466 MatScalar **HD = baij->hd,*baij_a; 467 const PetscScalar *v_t,*value; 468 #if defined(PETSC_USE_DEBUG) 469 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 470 #endif 471 472 PetscFunctionBegin; 473 if (roworiented) stepval = (n-1)*bs; 474 else stepval = (m-1)*bs; 475 476 for (i=0; i<m; i++) { 477 #if defined(PETSC_USE_DEBUG) 478 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 479 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 480 #endif 481 row = im[i]; 482 v_t = v + i*nbs2; 483 if (row >= rstart && row < rend) { 484 for (j=0; j<n; j++) { 485 col = in[j]; 486 487 /* Look up into the Hash Table */ 488 key = row*Nbs+col+1; 489 h1 = HASH(size,key,tmp); 490 491 idx = h1; 492 #if defined(PETSC_USE_DEBUG) 493 total_ct++; 494 insert_ct++; 495 if (HT[idx] != key) { 496 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 497 if (idx == size) { 498 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 499 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 500 } 501 } 502 #else 503 if (HT[idx] != key) { 504 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 505 if (idx == size) { 506 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 507 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 508 } 509 } 510 #endif 511 baij_a = HD[idx]; 512 if (roworiented) { 513 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 514 /* value = v + (i*(stepval+bs)+j)*bs; */ 515 value = v_t; 516 v_t += bs; 517 if (addv == ADD_VALUES) { 518 for (ii=0; ii<bs; ii++,value+=stepval) { 519 for (jj=ii; jj<bs2; jj+=bs) { 520 baij_a[jj] += *value++; 521 } 522 } 523 } else { 524 for (ii=0; ii<bs; ii++,value+=stepval) { 525 for (jj=ii; jj<bs2; jj+=bs) { 526 baij_a[jj] = *value++; 527 } 528 } 529 } 530 } else { 531 value = v + j*(stepval+bs)*bs + i*bs; 532 if (addv == ADD_VALUES) { 533 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 534 for (jj=0; jj<bs; jj++) { 535 baij_a[jj] += *value++; 536 } 537 } 538 } else { 539 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 540 for (jj=0; jj<bs; jj++) { 541 baij_a[jj] = *value++; 542 } 543 } 544 } 545 } 546 } 547 } else { 548 if (!baij->donotstash) { 549 if (roworiented) { 550 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 #if defined(PETSC_USE_DEBUG) 558 baij->ht_total_ct = total_ct; 559 baij->ht_insert_ct = insert_ct; 560 #endif 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatGetValues_MPIBAIJ" 566 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 567 { 568 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 569 PetscErrorCode ierr; 570 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 571 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 572 573 PetscFunctionBegin; 574 for (i=0; i<m; i++) { 575 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 576 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 577 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 578 row = idxm[i] - bsrstart; 579 for (j=0; j<n; j++) { 580 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 581 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 582 if (idxn[j] >= bscstart && idxn[j] < bscend) { 583 col = idxn[j] - bscstart; 584 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 585 } else { 586 if (!baij->colmap) { 587 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 591 data--; 592 #else 593 data = baij->colmap[idxn[j]/bs]-1; 594 #endif 595 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 596 else { 597 col = data + idxn[j]%bs; 598 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 599 } 600 } 601 } 602 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatNorm_MPIBAIJ" 609 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 610 { 611 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 612 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 613 PetscErrorCode ierr; 614 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 615 PetscReal sum = 0.0; 616 MatScalar *v; 617 618 PetscFunctionBegin; 619 if (baij->size == 1) { 620 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 621 } else { 622 if (type == NORM_FROBENIUS) { 623 v = amat->a; 624 nz = amat->nz*bs2; 625 for (i=0; i<nz; i++) { 626 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 627 } 628 v = bmat->a; 629 nz = bmat->nz*bs2; 630 for (i=0; i<nz; i++) { 631 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 632 } 633 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 634 *nrm = PetscSqrtReal(*nrm); 635 } else if (type == NORM_1) { /* max column sum */ 636 PetscReal *tmp,*tmp2; 637 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 638 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 639 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 640 v = amat->a; jj = amat->j; 641 for (i=0; i<amat->nz; i++) { 642 for (j=0; j<bs; j++) { 643 col = bs*(cstart + *jj) + j; /* column index */ 644 for (row=0; row<bs; row++) { 645 tmp[col] += PetscAbsScalar(*v); v++; 646 } 647 } 648 jj++; 649 } 650 v = bmat->a; jj = bmat->j; 651 for (i=0; i<bmat->nz; i++) { 652 for (j=0; j<bs; j++) { 653 col = bs*garray[*jj] + j; 654 for (row=0; row<bs; row++) { 655 tmp[col] += PetscAbsScalar(*v); v++; 656 } 657 } 658 jj++; 659 } 660 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 661 *nrm = 0.0; 662 for (j=0; j<mat->cmap->N; j++) { 663 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 664 } 665 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 666 } else if (type == NORM_INFINITY) { /* max row sum */ 667 PetscReal *sums; 668 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 669 sum = 0.0; 670 for (j=0; j<amat->mbs; j++) { 671 for (row=0; row<bs; row++) sums[row] = 0.0; 672 v = amat->a + bs2*amat->i[j]; 673 nz = amat->i[j+1]-amat->i[j]; 674 for (i=0; i<nz; i++) { 675 for (col=0; col<bs; col++) { 676 for (row=0; row<bs; row++) { 677 sums[row] += PetscAbsScalar(*v); v++; 678 } 679 } 680 } 681 v = bmat->a + bs2*bmat->i[j]; 682 nz = bmat->i[j+1]-bmat->i[j]; 683 for (i=0; i<nz; i++) { 684 for (col=0; col<bs; col++) { 685 for (row=0; row<bs; row++) { 686 sums[row] += PetscAbsScalar(*v); v++; 687 } 688 } 689 } 690 for (row=0; row<bs; row++) { 691 if (sums[row] > sum) sum = sums[row]; 692 } 693 } 694 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 695 ierr = PetscFree(sums);CHKERRQ(ierr); 696 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 697 } 698 PetscFunctionReturn(0); 699 } 700 701 /* 702 Creates the hash table, and sets the table 703 This table is created only once. 704 If new entried need to be added to the matrix 705 then the hash table has to be destroyed and 706 recreated. 707 */ 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 710 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 711 { 712 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 713 Mat A = baij->A,B=baij->B; 714 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 715 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 716 PetscErrorCode ierr; 717 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 718 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 719 PetscInt *HT,key; 720 MatScalar **HD; 721 PetscReal tmp; 722 #if defined(PETSC_USE_INFO) 723 PetscInt ct=0,max=0; 724 #endif 725 726 PetscFunctionBegin; 727 if (baij->ht) PetscFunctionReturn(0); 728 729 baij->ht_size = (PetscInt)(factor*nz); 730 ht_size = baij->ht_size; 731 732 /* Allocate Memory for Hash Table */ 733 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 734 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 735 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 736 HD = baij->hd; 737 HT = baij->ht; 738 739 /* Loop Over A */ 740 for (i=0; i<a->mbs; i++) { 741 for (j=ai[i]; j<ai[i+1]; j++) { 742 row = i+rstart; 743 col = aj[j]+cstart; 744 745 key = row*Nbs + col + 1; 746 h1 = HASH(ht_size,key,tmp); 747 for (k=0; k<ht_size; k++) { 748 if (!HT[(h1+k)%ht_size]) { 749 HT[(h1+k)%ht_size] = key; 750 HD[(h1+k)%ht_size] = a->a + j*bs2; 751 break; 752 #if defined(PETSC_USE_INFO) 753 } else { 754 ct++; 755 #endif 756 } 757 } 758 #if defined(PETSC_USE_INFO) 759 if (k> max) max = k; 760 #endif 761 } 762 } 763 /* Loop Over B */ 764 for (i=0; i<b->mbs; i++) { 765 for (j=bi[i]; j<bi[i+1]; j++) { 766 row = i+rstart; 767 col = garray[bj[j]]; 768 key = row*Nbs + col + 1; 769 h1 = HASH(ht_size,key,tmp); 770 for (k=0; k<ht_size; k++) { 771 if (!HT[(h1+k)%ht_size]) { 772 HT[(h1+k)%ht_size] = key; 773 HD[(h1+k)%ht_size] = b->a + j*bs2; 774 break; 775 #if defined(PETSC_USE_INFO) 776 } else { 777 ct++; 778 #endif 779 } 780 } 781 #if defined(PETSC_USE_INFO) 782 if (k> max) max = k; 783 #endif 784 } 785 } 786 787 /* Print Summary */ 788 #if defined(PETSC_USE_INFO) 789 for (i=0,j=0; i<ht_size; i++) { 790 if (HT[i]) j++; 791 } 792 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 793 #endif 794 PetscFunctionReturn(0); 795 } 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 799 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 800 { 801 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 802 PetscErrorCode ierr; 803 PetscInt nstash,reallocs; 804 InsertMode addv; 805 806 PetscFunctionBegin; 807 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 808 809 /* make sure all processors are either in INSERTMODE or ADDMODE */ 810 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 811 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 812 mat->insertmode = addv; /* in case this processor had no cache */ 813 814 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 815 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 816 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 817 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 818 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 819 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 825 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 826 { 827 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 828 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 829 PetscErrorCode ierr; 830 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 831 PetscInt *row,*col; 832 PetscBool r1,r2,r3,other_disassembled; 833 MatScalar *val; 834 InsertMode addv = mat->insertmode; 835 PetscMPIInt n; 836 837 PetscFunctionBegin; 838 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 839 if (!baij->donotstash && !mat->nooffprocentries) { 840 while (1) { 841 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 842 if (!flg) break; 843 844 for (i=0; i<n;) { 845 /* Now identify the consecutive vals belonging to the same row */ 846 for (j=i,rstart=row[j]; j<n; j++) { 847 if (row[j] != rstart) break; 848 } 849 if (j < n) ncols = j-i; 850 else ncols = n-i; 851 /* Now assemble all these values with a single function call */ 852 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 853 i = j; 854 } 855 } 856 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 857 /* Now process the block-stash. Since the values are stashed column-oriented, 858 set the roworiented flag to column oriented, and after MatSetValues() 859 restore the original flags */ 860 r1 = baij->roworiented; 861 r2 = a->roworiented; 862 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 863 864 baij->roworiented = PETSC_FALSE; 865 a->roworiented = PETSC_FALSE; 866 867 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 868 while (1) { 869 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 870 if (!flg) break; 871 872 for (i=0; i<n;) { 873 /* Now identify the consecutive vals belonging to the same row */ 874 for (j=i,rstart=row[j]; j<n; j++) { 875 if (row[j] != rstart) break; 876 } 877 if (j < n) ncols = j-i; 878 else ncols = n-i; 879 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 880 i = j; 881 } 882 } 883 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 884 885 baij->roworiented = r1; 886 a->roworiented = r2; 887 888 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 889 } 890 891 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 892 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 893 894 /* determine if any processor has disassembled, if so we must 895 also disassemble ourselfs, in order that we may reassemble. */ 896 /* 897 if nonzero structure of submatrix B cannot change then we know that 898 no processor disassembled thus we can skip this stuff 899 */ 900 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 901 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 902 if (mat->was_assembled && !other_disassembled) { 903 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 904 } 905 } 906 907 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 908 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 909 } 910 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 911 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 912 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 913 914 #if defined(PETSC_USE_INFO) 915 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 916 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 917 918 baij->ht_total_ct = 0; 919 baij->ht_insert_ct = 0; 920 } 921 #endif 922 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 923 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 924 925 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 926 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 927 } 928 929 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 930 931 baij->rowvalues = 0; 932 PetscFunctionReturn(0); 933 } 934 935 #include <petscdraw.h> 936 #undef __FUNCT__ 937 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 938 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 939 { 940 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 941 PetscErrorCode ierr; 942 PetscMPIInt size = baij->size,rank = baij->rank; 943 PetscInt bs = mat->rmap->bs; 944 PetscBool iascii,isdraw; 945 PetscViewer sviewer; 946 PetscViewerFormat format; 947 948 PetscFunctionBegin; 949 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 950 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 951 if (iascii) { 952 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 953 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 954 MatInfo info; 955 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 956 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 957 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 958 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 959 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 960 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 961 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 962 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 963 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 964 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 965 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 966 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 967 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } else if (format == PETSC_VIEWER_ASCII_INFO) { 970 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 971 PetscFunctionReturn(0); 972 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 973 PetscFunctionReturn(0); 974 } 975 } 976 977 if (isdraw) { 978 PetscDraw draw; 979 PetscBool isnull; 980 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 981 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 982 } 983 984 if (size == 1) { 985 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 986 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 987 } else { 988 /* assemble the entire matrix onto first processor. */ 989 Mat A; 990 Mat_SeqBAIJ *Aloc; 991 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 992 MatScalar *a; 993 994 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 995 /* Perhaps this should be the type of mat? */ 996 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 997 if (!rank) { 998 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 999 } else { 1000 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1001 } 1002 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1003 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1004 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1005 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1006 1007 /* copy over the A part */ 1008 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1009 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1010 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1011 1012 for (i=0; i<mbs; i++) { 1013 rvals[0] = bs*(baij->rstartbs + i); 1014 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1015 for (j=ai[i]; j<ai[i+1]; j++) { 1016 col = (baij->cstartbs+aj[j])*bs; 1017 for (k=0; k<bs; k++) { 1018 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1019 col++; a += bs; 1020 } 1021 } 1022 } 1023 /* copy over the B part */ 1024 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1025 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1026 for (i=0; i<mbs; i++) { 1027 rvals[0] = bs*(baij->rstartbs + i); 1028 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1029 for (j=ai[i]; j<ai[i+1]; j++) { 1030 col = baij->garray[aj[j]]*bs; 1031 for (k=0; k<bs; k++) { 1032 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1033 col++; a += bs; 1034 } 1035 } 1036 } 1037 ierr = PetscFree(rvals);CHKERRQ(ierr); 1038 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1039 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1040 /* 1041 Everyone has to call to draw the matrix since the graphics waits are 1042 synchronized across all processors that share the PetscDraw object 1043 */ 1044 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1045 if (!rank) { 1046 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1047 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1048 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1049 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1050 } 1051 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1052 ierr = MatDestroy(&A);CHKERRQ(ierr); 1053 } 1054 PetscFunctionReturn(0); 1055 } 1056 1057 #undef __FUNCT__ 1058 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1059 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1060 { 1061 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1062 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1063 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1064 PetscErrorCode ierr; 1065 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1066 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1067 int fd; 1068 PetscScalar *column_values; 1069 FILE *file; 1070 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1071 PetscInt message_count,flowcontrolcount; 1072 1073 PetscFunctionBegin; 1074 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1075 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1076 nz = bs2*(A->nz + B->nz); 1077 rlen = mat->rmap->n; 1078 if (!rank) { 1079 header[0] = MAT_FILE_CLASSID; 1080 header[1] = mat->rmap->N; 1081 header[2] = mat->cmap->N; 1082 1083 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1084 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1085 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1086 /* get largest number of rows any processor has */ 1087 range = mat->rmap->range; 1088 for (i=1; i<size; i++) { 1089 rlen = PetscMax(rlen,range[i+1] - range[i]); 1090 } 1091 } else { 1092 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1093 } 1094 1095 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1096 /* compute lengths of each row */ 1097 for (i=0; i<a->mbs; i++) { 1098 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1099 } 1100 /* store the row lengths to the file */ 1101 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1102 if (!rank) { 1103 MPI_Status status; 1104 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1105 rlen = (range[1] - range[0])/bs; 1106 for (i=0; i<rlen; i++) { 1107 for (j=0; j<bs; j++) { 1108 row_lens[i*bs+j] = bs*crow_lens[i]; 1109 } 1110 } 1111 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1112 for (i=1; i<size; i++) { 1113 rlen = (range[i+1] - range[i])/bs; 1114 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1115 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1116 for (k=0; k<rlen; k++) { 1117 for (j=0; j<bs; j++) { 1118 row_lens[k*bs+j] = bs*crow_lens[k]; 1119 } 1120 } 1121 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1122 } 1123 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1124 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1125 } else { 1126 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1127 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1128 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1129 } 1130 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1131 1132 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1133 information needed to make it for each row from a block row. This does require more communication but still not more than 1134 the communication needed for the nonzero values */ 1135 nzmax = nz; /* space a largest processor needs */ 1136 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1137 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1138 cnt = 0; 1139 for (i=0; i<a->mbs; i++) { 1140 pcnt = cnt; 1141 for (j=B->i[i]; j<B->i[i+1]; j++) { 1142 if ((col = garray[B->j[j]]) > cstart) break; 1143 for (l=0; l<bs; l++) { 1144 column_indices[cnt++] = bs*col+l; 1145 } 1146 } 1147 for (k=A->i[i]; k<A->i[i+1]; k++) { 1148 for (l=0; l<bs; l++) { 1149 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1150 } 1151 } 1152 for (; j<B->i[i+1]; j++) { 1153 for (l=0; l<bs; l++) { 1154 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1155 } 1156 } 1157 len = cnt - pcnt; 1158 for (k=1; k<bs; k++) { 1159 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1160 cnt += len; 1161 } 1162 } 1163 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1164 1165 /* store the columns to the file */ 1166 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1167 if (!rank) { 1168 MPI_Status status; 1169 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1170 for (i=1; i<size; i++) { 1171 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1172 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1173 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1174 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1175 } 1176 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1177 } else { 1178 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1179 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1180 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1181 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1182 } 1183 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1184 1185 /* load up the numerical values */ 1186 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1187 cnt = 0; 1188 for (i=0; i<a->mbs; i++) { 1189 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1190 for (j=B->i[i]; j<B->i[i+1]; j++) { 1191 if (garray[B->j[j]] > cstart) break; 1192 for (l=0; l<bs; l++) { 1193 for (ll=0; ll<bs; ll++) { 1194 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1195 } 1196 } 1197 cnt += bs; 1198 } 1199 for (k=A->i[i]; k<A->i[i+1]; k++) { 1200 for (l=0; l<bs; l++) { 1201 for (ll=0; ll<bs; ll++) { 1202 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1203 } 1204 } 1205 cnt += bs; 1206 } 1207 for (; j<B->i[i+1]; j++) { 1208 for (l=0; l<bs; l++) { 1209 for (ll=0; ll<bs; ll++) { 1210 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1211 } 1212 } 1213 cnt += bs; 1214 } 1215 cnt += (bs-1)*rlen; 1216 } 1217 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1218 1219 /* store the column values to the file */ 1220 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1221 if (!rank) { 1222 MPI_Status status; 1223 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1224 for (i=1; i<size; i++) { 1225 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1226 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1227 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1228 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1229 } 1230 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1231 } else { 1232 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1233 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1234 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1235 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1236 } 1237 ierr = PetscFree(column_values);CHKERRQ(ierr); 1238 1239 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1240 if (file) { 1241 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1242 } 1243 PetscFunctionReturn(0); 1244 } 1245 1246 #undef __FUNCT__ 1247 #define __FUNCT__ "MatView_MPIBAIJ" 1248 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1249 { 1250 PetscErrorCode ierr; 1251 PetscBool iascii,isdraw,issocket,isbinary; 1252 1253 PetscFunctionBegin; 1254 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1255 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1256 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1257 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1258 if (iascii || isdraw || issocket) { 1259 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1260 } else if (isbinary) { 1261 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1262 } 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1268 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1269 { 1270 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 #if defined(PETSC_USE_LOG) 1275 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1276 #endif 1277 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1278 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1279 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1280 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1281 #if defined(PETSC_USE_CTABLE) 1282 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1283 #else 1284 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1285 #endif 1286 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1287 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1288 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1289 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1290 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1291 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1292 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1293 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1294 1295 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",NULL);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatMult_MPIBAIJ" 1310 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1311 { 1312 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1313 PetscErrorCode ierr; 1314 PetscInt nt; 1315 1316 PetscFunctionBegin; 1317 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1318 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1319 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1320 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1321 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1322 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1323 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1324 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1325 PetscFunctionReturn(0); 1326 } 1327 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1330 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1331 { 1332 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1333 PetscErrorCode ierr; 1334 1335 PetscFunctionBegin; 1336 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1337 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1338 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1339 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 #undef __FUNCT__ 1344 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1345 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1346 { 1347 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1348 PetscErrorCode ierr; 1349 PetscBool merged; 1350 1351 PetscFunctionBegin; 1352 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1353 /* do nondiagonal part */ 1354 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1355 if (!merged) { 1356 /* send it on its way */ 1357 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1358 /* do local part */ 1359 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1360 /* receive remote parts: note this assumes the values are not actually */ 1361 /* inserted in yy until the next line */ 1362 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1363 } else { 1364 /* do local part */ 1365 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1366 /* send it on its way */ 1367 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1368 /* values actually were received in the Begin() but we need to call this nop */ 1369 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1370 } 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1376 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1377 { 1378 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1379 PetscErrorCode ierr; 1380 1381 PetscFunctionBegin; 1382 /* do nondiagonal part */ 1383 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1384 /* send it on its way */ 1385 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1386 /* do local part */ 1387 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1388 /* receive remote parts: note this assumes the values are not actually */ 1389 /* inserted in yy until the next line, which is true for my implementation*/ 1390 /* but is not perhaps always true. */ 1391 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1392 PetscFunctionReturn(0); 1393 } 1394 1395 /* 1396 This only works correctly for square matrices where the subblock A->A is the 1397 diagonal block 1398 */ 1399 #undef __FUNCT__ 1400 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1401 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1402 { 1403 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1404 PetscErrorCode ierr; 1405 1406 PetscFunctionBegin; 1407 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1408 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatScale_MPIBAIJ" 1414 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1415 { 1416 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1417 PetscErrorCode ierr; 1418 1419 PetscFunctionBegin; 1420 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1421 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1427 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1428 { 1429 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1430 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1431 PetscErrorCode ierr; 1432 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1433 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1434 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1435 1436 PetscFunctionBegin; 1437 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1438 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1439 mat->getrowactive = PETSC_TRUE; 1440 1441 if (!mat->rowvalues && (idx || v)) { 1442 /* 1443 allocate enough space to hold information from the longest row. 1444 */ 1445 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1446 PetscInt max = 1,mbs = mat->mbs,tmp; 1447 for (i=0; i<mbs; i++) { 1448 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1449 if (max < tmp) max = tmp; 1450 } 1451 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1452 } 1453 lrow = row - brstart; 1454 1455 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1456 if (!v) {pvA = 0; pvB = 0;} 1457 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1458 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1459 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1460 nztot = nzA + nzB; 1461 1462 cmap = mat->garray; 1463 if (v || idx) { 1464 if (nztot) { 1465 /* Sort by increasing column numbers, assuming A and B already sorted */ 1466 PetscInt imark = -1; 1467 if (v) { 1468 *v = v_p = mat->rowvalues; 1469 for (i=0; i<nzB; i++) { 1470 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1471 else break; 1472 } 1473 imark = i; 1474 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1475 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1476 } 1477 if (idx) { 1478 *idx = idx_p = mat->rowindices; 1479 if (imark > -1) { 1480 for (i=0; i<imark; i++) { 1481 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1482 } 1483 } else { 1484 for (i=0; i<nzB; i++) { 1485 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1486 else break; 1487 } 1488 imark = i; 1489 } 1490 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1491 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1492 } 1493 } else { 1494 if (idx) *idx = 0; 1495 if (v) *v = 0; 1496 } 1497 } 1498 *nz = nztot; 1499 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1500 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1501 PetscFunctionReturn(0); 1502 } 1503 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1506 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1507 { 1508 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1509 1510 PetscFunctionBegin; 1511 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1512 baij->getrowactive = PETSC_FALSE; 1513 PetscFunctionReturn(0); 1514 } 1515 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1518 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1519 { 1520 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1521 PetscErrorCode ierr; 1522 1523 PetscFunctionBegin; 1524 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1525 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1526 PetscFunctionReturn(0); 1527 } 1528 1529 #undef __FUNCT__ 1530 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1531 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1532 { 1533 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1534 Mat A = a->A,B = a->B; 1535 PetscErrorCode ierr; 1536 PetscReal isend[5],irecv[5]; 1537 1538 PetscFunctionBegin; 1539 info->block_size = (PetscReal)matin->rmap->bs; 1540 1541 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1542 1543 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1544 isend[3] = info->memory; isend[4] = info->mallocs; 1545 1546 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1547 1548 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1549 isend[3] += info->memory; isend[4] += info->mallocs; 1550 1551 if (flag == MAT_LOCAL) { 1552 info->nz_used = isend[0]; 1553 info->nz_allocated = isend[1]; 1554 info->nz_unneeded = isend[2]; 1555 info->memory = isend[3]; 1556 info->mallocs = isend[4]; 1557 } else if (flag == MAT_GLOBAL_MAX) { 1558 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1559 1560 info->nz_used = irecv[0]; 1561 info->nz_allocated = irecv[1]; 1562 info->nz_unneeded = irecv[2]; 1563 info->memory = irecv[3]; 1564 info->mallocs = irecv[4]; 1565 } else if (flag == MAT_GLOBAL_SUM) { 1566 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1567 1568 info->nz_used = irecv[0]; 1569 info->nz_allocated = irecv[1]; 1570 info->nz_unneeded = irecv[2]; 1571 info->memory = irecv[3]; 1572 info->mallocs = irecv[4]; 1573 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1574 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1575 info->fill_ratio_needed = 0; 1576 info->factor_mallocs = 0; 1577 PetscFunctionReturn(0); 1578 } 1579 1580 #undef __FUNCT__ 1581 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1582 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1583 { 1584 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1585 PetscErrorCode ierr; 1586 1587 PetscFunctionBegin; 1588 switch (op) { 1589 case MAT_NEW_NONZERO_LOCATIONS: 1590 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1591 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1592 case MAT_KEEP_NONZERO_PATTERN: 1593 case MAT_NEW_NONZERO_LOCATION_ERR: 1594 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1595 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1596 break; 1597 case MAT_ROW_ORIENTED: 1598 a->roworiented = flg; 1599 1600 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1601 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1602 break; 1603 case MAT_NEW_DIAGONALS: 1604 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1605 break; 1606 case MAT_IGNORE_OFF_PROC_ENTRIES: 1607 a->donotstash = flg; 1608 break; 1609 case MAT_USE_HASH_TABLE: 1610 a->ht_flag = flg; 1611 break; 1612 case MAT_SYMMETRIC: 1613 case MAT_STRUCTURALLY_SYMMETRIC: 1614 case MAT_HERMITIAN: 1615 case MAT_SYMMETRY_ETERNAL: 1616 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1617 break; 1618 default: 1619 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1620 } 1621 PetscFunctionReturn(0); 1622 } 1623 1624 #undef __FUNCT__ 1625 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1626 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1627 { 1628 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1629 Mat_SeqBAIJ *Aloc; 1630 Mat B; 1631 PetscErrorCode ierr; 1632 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1633 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1634 MatScalar *a; 1635 1636 PetscFunctionBegin; 1637 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1638 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1639 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1640 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1641 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1642 /* Do not know preallocation information, but must set block size */ 1643 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1644 } else { 1645 B = *matout; 1646 } 1647 1648 /* copy over the A part */ 1649 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1650 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1651 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1652 1653 for (i=0; i<mbs; i++) { 1654 rvals[0] = bs*(baij->rstartbs + i); 1655 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1656 for (j=ai[i]; j<ai[i+1]; j++) { 1657 col = (baij->cstartbs+aj[j])*bs; 1658 for (k=0; k<bs; k++) { 1659 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1660 1661 col++; a += bs; 1662 } 1663 } 1664 } 1665 /* copy over the B part */ 1666 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1667 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1668 for (i=0; i<mbs; i++) { 1669 rvals[0] = bs*(baij->rstartbs + i); 1670 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1671 for (j=ai[i]; j<ai[i+1]; j++) { 1672 col = baij->garray[aj[j]]*bs; 1673 for (k=0; k<bs; k++) { 1674 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1675 col++; 1676 a += bs; 1677 } 1678 } 1679 } 1680 ierr = PetscFree(rvals);CHKERRQ(ierr); 1681 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1682 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1683 1684 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1685 else { 1686 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1687 } 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1693 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1694 { 1695 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1696 Mat a = baij->A,b = baij->B; 1697 PetscErrorCode ierr; 1698 PetscInt s1,s2,s3; 1699 1700 PetscFunctionBegin; 1701 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1702 if (rr) { 1703 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1704 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1705 /* Overlap communication with computation. */ 1706 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1707 } 1708 if (ll) { 1709 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1710 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1711 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1712 } 1713 /* scale the diagonal block */ 1714 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1715 1716 if (rr) { 1717 /* Do a scatter end and then right scale the off-diagonal block */ 1718 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1719 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1720 } 1721 PetscFunctionReturn(0); 1722 } 1723 1724 #undef __FUNCT__ 1725 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1726 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1727 { 1728 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1729 PetscErrorCode ierr; 1730 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1731 PetscInt i,*owners = A->rmap->range; 1732 PetscInt *nprocs,j,idx,nsends,row; 1733 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1734 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1735 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1736 MPI_Comm comm; 1737 MPI_Request *send_waits,*recv_waits; 1738 MPI_Status recv_status,*send_status; 1739 const PetscScalar *xx; 1740 PetscScalar *bb; 1741 #if defined(PETSC_DEBUG) 1742 PetscBool found = PETSC_FALSE; 1743 #endif 1744 1745 PetscFunctionBegin; 1746 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1747 /* first count number of contributors to each processor */ 1748 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1749 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1750 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1751 j = 0; 1752 for (i=0; i<N; i++) { 1753 if (lastidx > (idx = rows[i])) j = 0; 1754 lastidx = idx; 1755 for (; j<size; j++) { 1756 if (idx >= owners[j] && idx < owners[j+1]) { 1757 nprocs[2*j]++; 1758 nprocs[2*j+1] = 1; 1759 owner[i] = j; 1760 #if defined(PETSC_DEBUG) 1761 found = PETSC_TRUE; 1762 #endif 1763 break; 1764 } 1765 } 1766 #if defined(PETSC_DEBUG) 1767 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1768 found = PETSC_FALSE; 1769 #endif 1770 } 1771 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 1772 1773 if (A->nooffproczerorows) { 1774 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1775 nrecvs = nsends; 1776 nmax = N; 1777 } else { 1778 /* inform other processors of number of messages and max length*/ 1779 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1780 } 1781 1782 /* post receives: */ 1783 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1784 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1785 for (i=0; i<nrecvs; i++) { 1786 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1787 } 1788 1789 /* do sends: 1790 1) starts[i] gives the starting index in svalues for stuff going to 1791 the ith processor 1792 */ 1793 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1794 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1795 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1796 starts[0] = 0; 1797 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 1798 for (i=0; i<N; i++) { 1799 svalues[starts[owner[i]]++] = rows[i]; 1800 } 1801 1802 starts[0] = 0; 1803 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 1804 count = 0; 1805 for (i=0; i<size; i++) { 1806 if (nprocs[2*i+1]) { 1807 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1808 } 1809 } 1810 ierr = PetscFree(starts);CHKERRQ(ierr); 1811 1812 base = owners[rank]; 1813 1814 /* wait on receives */ 1815 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1816 count = nrecvs; 1817 slen = 0; 1818 while (count) { 1819 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1820 /* unpack receives into our local space */ 1821 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1822 1823 source[imdex] = recv_status.MPI_SOURCE; 1824 lens[imdex] = n; 1825 slen += n; 1826 count--; 1827 } 1828 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1829 1830 /* move the data into the send scatter */ 1831 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1832 count = 0; 1833 for (i=0; i<nrecvs; i++) { 1834 values = rvalues + i*nmax; 1835 for (j=0; j<lens[i]; j++) { 1836 lrows[count++] = values[j] - base; 1837 } 1838 } 1839 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1840 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1841 ierr = PetscFree(owner);CHKERRQ(ierr); 1842 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1843 1844 /* fix right hand side if needed */ 1845 if (x && b) { 1846 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1847 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1848 for (i=0; i<slen; i++) { 1849 bb[lrows[i]] = diag*xx[lrows[i]]; 1850 } 1851 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1852 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1853 } 1854 1855 /* actually zap the local rows */ 1856 /* 1857 Zero the required rows. If the "diagonal block" of the matrix 1858 is square and the user wishes to set the diagonal we use separate 1859 code so that MatSetValues() is not called for each diagonal allocating 1860 new memory, thus calling lots of mallocs and slowing things down. 1861 1862 */ 1863 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1864 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1865 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1866 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1867 } else if (diag != 0.0) { 1868 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1869 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1870 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1871 for (i=0; i<slen; i++) { 1872 row = lrows[i] + rstart_bs; 1873 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1874 } 1875 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1876 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1877 } else { 1878 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1879 } 1880 1881 ierr = PetscFree(lrows);CHKERRQ(ierr); 1882 1883 /* wait on sends */ 1884 if (nsends) { 1885 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1886 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1887 ierr = PetscFree(send_status);CHKERRQ(ierr); 1888 } 1889 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1890 ierr = PetscFree(svalues);CHKERRQ(ierr); 1891 PetscFunctionReturn(0); 1892 } 1893 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1896 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1897 { 1898 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1899 PetscErrorCode ierr; 1900 1901 PetscFunctionBegin; 1902 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1903 PetscFunctionReturn(0); 1904 } 1905 1906 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1907 1908 #undef __FUNCT__ 1909 #define __FUNCT__ "MatEqual_MPIBAIJ" 1910 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1911 { 1912 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1913 Mat a,b,c,d; 1914 PetscBool flg; 1915 PetscErrorCode ierr; 1916 1917 PetscFunctionBegin; 1918 a = matA->A; b = matA->B; 1919 c = matB->A; d = matB->B; 1920 1921 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1922 if (flg) { 1923 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1924 } 1925 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1926 PetscFunctionReturn(0); 1927 } 1928 1929 #undef __FUNCT__ 1930 #define __FUNCT__ "MatCopy_MPIBAIJ" 1931 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1932 { 1933 PetscErrorCode ierr; 1934 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1935 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1936 1937 PetscFunctionBegin; 1938 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1939 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1940 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1941 } else { 1942 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1943 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1944 } 1945 PetscFunctionReturn(0); 1946 } 1947 1948 #undef __FUNCT__ 1949 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1950 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1951 { 1952 PetscErrorCode ierr; 1953 1954 PetscFunctionBegin; 1955 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1956 PetscFunctionReturn(0); 1957 } 1958 1959 #undef __FUNCT__ 1960 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1961 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1962 { 1963 PetscErrorCode ierr; 1964 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1965 PetscBLASInt bnz,one=1; 1966 Mat_SeqBAIJ *x,*y; 1967 1968 PetscFunctionBegin; 1969 if (str == SAME_NONZERO_PATTERN) { 1970 PetscScalar alpha = a; 1971 x = (Mat_SeqBAIJ*)xx->A->data; 1972 y = (Mat_SeqBAIJ*)yy->A->data; 1973 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1974 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1975 x = (Mat_SeqBAIJ*)xx->B->data; 1976 y = (Mat_SeqBAIJ*)yy->B->data; 1977 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1978 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1979 } else { 1980 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1981 } 1982 PetscFunctionReturn(0); 1983 } 1984 1985 #undef __FUNCT__ 1986 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1987 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1988 { 1989 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1990 PetscErrorCode ierr; 1991 1992 PetscFunctionBegin; 1993 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1994 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1995 PetscFunctionReturn(0); 1996 } 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2000 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2001 { 2002 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2003 PetscErrorCode ierr; 2004 2005 PetscFunctionBegin; 2006 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2007 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2008 PetscFunctionReturn(0); 2009 } 2010 2011 #undef __FUNCT__ 2012 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2013 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2014 { 2015 PetscErrorCode ierr; 2016 IS iscol_local; 2017 PetscInt csize; 2018 2019 PetscFunctionBegin; 2020 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2021 if (call == MAT_REUSE_MATRIX) { 2022 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2023 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2024 } else { 2025 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2026 } 2027 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2028 if (call == MAT_INITIAL_MATRIX) { 2029 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2030 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2031 } 2032 PetscFunctionReturn(0); 2033 } 2034 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2037 /* 2038 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2039 in local and then by concatenating the local matrices the end result. 2040 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2041 */ 2042 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2043 { 2044 PetscErrorCode ierr; 2045 PetscMPIInt rank,size; 2046 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2047 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2048 Mat M,Mreuse; 2049 MatScalar *vwork,*aa; 2050 MPI_Comm comm; 2051 IS isrow_new, iscol_new; 2052 PetscBool idflag,allrows, allcols; 2053 Mat_SeqBAIJ *aij; 2054 2055 PetscFunctionBegin; 2056 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2057 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2058 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2059 /* The compression and expansion should be avoided. Doesn't point 2060 out errors, might change the indices, hence buggey */ 2061 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2062 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2063 2064 /* Check for special case: each processor gets entire matrix columns */ 2065 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2066 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2067 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 2068 else allcols = PETSC_FALSE; 2069 2070 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2071 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2072 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 2073 else allrows = PETSC_FALSE; 2074 2075 if (call == MAT_REUSE_MATRIX) { 2076 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2077 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2078 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2079 } else { 2080 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2081 } 2082 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2083 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2084 /* 2085 m - number of local rows 2086 n - number of columns (same on all processors) 2087 rstart - first row in new global matrix generated 2088 */ 2089 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2090 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2091 m = m/bs; 2092 n = n/bs; 2093 2094 if (call == MAT_INITIAL_MATRIX) { 2095 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2096 ii = aij->i; 2097 jj = aij->j; 2098 2099 /* 2100 Determine the number of non-zeros in the diagonal and off-diagonal 2101 portions of the matrix in order to do correct preallocation 2102 */ 2103 2104 /* first get start and end of "diagonal" columns */ 2105 if (csize == PETSC_DECIDE) { 2106 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2107 if (mglobal == n*bs) { /* square matrix */ 2108 nlocal = m; 2109 } else { 2110 nlocal = n/size + ((n % size) > rank); 2111 } 2112 } else { 2113 nlocal = csize/bs; 2114 } 2115 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2116 rstart = rend - nlocal; 2117 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2118 2119 /* next, compute all the lengths */ 2120 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2121 olens = dlens + m; 2122 for (i=0; i<m; i++) { 2123 jend = ii[i+1] - ii[i]; 2124 olen = 0; 2125 dlen = 0; 2126 for (j=0; j<jend; j++) { 2127 if (*jj < rstart || *jj >= rend) olen++; 2128 else dlen++; 2129 jj++; 2130 } 2131 olens[i] = olen; 2132 dlens[i] = dlen; 2133 } 2134 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2135 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2136 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2137 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2138 ierr = PetscFree(dlens);CHKERRQ(ierr); 2139 } else { 2140 PetscInt ml,nl; 2141 2142 M = *newmat; 2143 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2144 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2145 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2146 /* 2147 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2148 rather than the slower MatSetValues(). 2149 */ 2150 M->was_assembled = PETSC_TRUE; 2151 M->assembled = PETSC_FALSE; 2152 } 2153 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2154 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2155 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2156 ii = aij->i; 2157 jj = aij->j; 2158 aa = aij->a; 2159 for (i=0; i<m; i++) { 2160 row = rstart/bs + i; 2161 nz = ii[i+1] - ii[i]; 2162 cwork = jj; jj += nz; 2163 vwork = aa; aa += nz*bs*bs; 2164 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2165 } 2166 2167 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2168 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2169 *newmat = M; 2170 2171 /* save submatrix used in processor for next request */ 2172 if (call == MAT_INITIAL_MATRIX) { 2173 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2174 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2175 } 2176 PetscFunctionReturn(0); 2177 } 2178 2179 #undef __FUNCT__ 2180 #define __FUNCT__ "MatPermute_MPIBAIJ" 2181 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2182 { 2183 MPI_Comm comm,pcomm; 2184 PetscInt first,rlocal_size,clocal_size,nrows; 2185 const PetscInt *rows; 2186 PetscMPIInt size; 2187 IS crowp,growp,irowp,lrowp,lcolp; 2188 PetscErrorCode ierr; 2189 2190 PetscFunctionBegin; 2191 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2192 /* make a collective version of 'rowp' */ 2193 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2194 if (pcomm==comm) { 2195 crowp = rowp; 2196 } else { 2197 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2198 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2199 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2200 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2201 } 2202 /* collect the global row permutation and invert it */ 2203 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2204 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2205 if (pcomm!=comm) { 2206 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2207 } 2208 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2209 ierr = ISDestroy(&growp);CHKERRQ(ierr); 2210 /* get the local target indices */ 2211 ierr = MatGetOwnershipRange(A,&first,NULL);CHKERRQ(ierr); 2212 ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr); 2213 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2214 ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2215 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2216 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2217 /* the column permutation is so much easier; 2218 make a local version of 'colp' and invert it */ 2219 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2220 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2221 if (size==1) { 2222 lcolp = colp; 2223 } else { 2224 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2225 } 2226 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2227 /* now we just get the submatrix */ 2228 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2229 if (size>1) { 2230 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2231 } 2232 /* clean up */ 2233 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2234 PetscFunctionReturn(0); 2235 } 2236 2237 #undef __FUNCT__ 2238 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2239 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2240 { 2241 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2242 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2243 2244 PetscFunctionBegin; 2245 if (nghosts) *nghosts = B->nbs; 2246 if (ghosts) *ghosts = baij->garray; 2247 PetscFunctionReturn(0); 2248 } 2249 2250 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat); 2251 2252 #undef __FUNCT__ 2253 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2254 /* 2255 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2256 */ 2257 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2258 { 2259 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2260 PetscErrorCode ierr; 2261 PetscMPIInt size,*ncolsonproc,*disp,nn; 2262 PetscInt bs,i,n,nrows,j,k,m,ncols,col; 2263 const PetscInt *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj,*ltog; 2264 PetscInt nis = iscoloring->n,nctot,*cols; 2265 PetscInt *rowhit,M,cstart,cend,colb; 2266 PetscInt *columnsforrow,l; 2267 IS *isa; 2268 PetscBool done,flg; 2269 ISLocalToGlobalMapping map = mat->cmap->bmapping; 2270 PetscInt ctype=c->ctype; 2271 2272 PetscFunctionBegin; 2273 if (!mat->assembled) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2274 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2275 2276 if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,<og);CHKERRQ(ierr);} 2277 else ltog = NULL; 2278 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2279 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2280 2281 M = mat->rmap->n/bs; 2282 cstart = mat->cmap->rstart/bs; 2283 cend = mat->cmap->rend/bs; 2284 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2285 c->N = mat->cmap->N/bs; 2286 c->m = mat->rmap->n/bs; 2287 c->rstart = mat->rmap->rstart/bs; 2288 2289 c->ncolors = nis; 2290 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2291 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2292 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2293 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2294 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2295 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2296 2297 /* Allow access to data structures of local part of matrix */ 2298 if (!baij->colmap) { 2299 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2300 } 2301 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2302 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2303 2304 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2305 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2306 2307 for (i=0; i<nis; i++) { 2308 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2309 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2310 2311 c->ncolumns[i] = n; 2312 if (n) { 2313 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2314 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2315 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2316 } else { 2317 c->columns[i] = 0; 2318 } 2319 2320 if (ctype == IS_COLORING_GLOBAL) { 2321 /* Determine the total (parallel) number of columns of this color */ 2322 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 2323 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2324 2325 ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr); 2326 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2327 nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j]; 2328 if (!nctot) { 2329 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2330 } 2331 2332 disp[0] = 0; 2333 for (j=1; j<size; j++) { 2334 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2335 } 2336 2337 /* Get complete list of columns for color on each processor */ 2338 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2339 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2340 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2341 } else if (ctype == IS_COLORING_GHOSTED) { 2342 /* Determine local number of columns of this color on this process, including ghost points */ 2343 nctot = n; 2344 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2345 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2346 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2347 2348 /* 2349 Mark all rows affect by these columns 2350 */ 2351 /* Temporary option to allow for debugging/testing */ 2352 flg = PETSC_FALSE; 2353 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 2354 if (!flg) { /*-----------------------------------------------------------------------------*/ 2355 /* crude, fast version */ 2356 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2357 /* loop over columns*/ 2358 for (j=0; j<nctot; j++) { 2359 if (ctype == IS_COLORING_GHOSTED) { 2360 col = ltog[cols[j]]; 2361 } else { 2362 col = cols[j]; 2363 } 2364 if (col >= cstart && col < cend) { 2365 /* column is in diagonal block of matrix */ 2366 rows = A_cj + A_ci[col-cstart]; 2367 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2368 } else { 2369 #if defined(PETSC_USE_CTABLE) 2370 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2371 colb--; 2372 #else 2373 colb = baij->colmap[col] - 1; 2374 #endif 2375 if (colb == -1) { 2376 m = 0; 2377 } else { 2378 colb = colb/bs; 2379 rows = B_cj + B_ci[colb]; 2380 m = B_ci[colb+1] - B_ci[colb]; 2381 } 2382 } 2383 /* loop over columns marking them in rowhit */ 2384 for (k=0; k<m; k++) { 2385 rowhit[*rows++] = col + 1; 2386 } 2387 } 2388 2389 /* count the number of hits */ 2390 nrows = 0; 2391 for (j=0; j<M; j++) { 2392 if (rowhit[j]) nrows++; 2393 } 2394 c->nrows[i] = nrows; 2395 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2396 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2397 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2398 nrows = 0; 2399 for (j=0; j<M; j++) { 2400 if (rowhit[j]) { 2401 c->rows[i][nrows] = j; 2402 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2403 nrows++; 2404 } 2405 } 2406 } else { /*-------------------------------------------------------------------------------*/ 2407 /* slow version, using rowhit as a linked list */ 2408 PetscInt currentcol,fm,mfm; 2409 rowhit[M] = M; 2410 nrows = 0; 2411 /* loop over columns*/ 2412 for (j=0; j<nctot; j++) { 2413 if (ctype == IS_COLORING_GHOSTED) { 2414 col = ltog[cols[j]]; 2415 } else { 2416 col = cols[j]; 2417 } 2418 if (col >= cstart && col < cend) { 2419 /* column is in diagonal block of matrix */ 2420 rows = A_cj + A_ci[col-cstart]; 2421 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2422 } else { 2423 #if defined(PETSC_USE_CTABLE) 2424 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2425 colb--; 2426 #else 2427 colb = baij->colmap[col] - 1; 2428 #endif 2429 if (colb == -1) { 2430 m = 0; 2431 } else { 2432 colb = colb/bs; 2433 rows = B_cj + B_ci[colb]; 2434 m = B_ci[colb+1] - B_ci[colb]; 2435 } 2436 } 2437 2438 /* loop over columns marking them in rowhit */ 2439 fm = M; /* fm points to first entry in linked list */ 2440 for (k=0; k<m; k++) { 2441 currentcol = *rows++; 2442 /* is it already in the list? */ 2443 do { 2444 mfm = fm; 2445 fm = rowhit[fm]; 2446 } while (fm < currentcol); 2447 /* not in list so add it */ 2448 if (fm != currentcol) { 2449 nrows++; 2450 columnsforrow[currentcol] = col; 2451 /* next three lines insert new entry into linked list */ 2452 rowhit[mfm] = currentcol; 2453 rowhit[currentcol] = fm; 2454 fm = currentcol; 2455 /* fm points to present position in list since we know the columns are sorted */ 2456 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2457 } 2458 } 2459 c->nrows[i] = nrows; 2460 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2461 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2462 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2463 /* now store the linked list of rows into c->rows[i] */ 2464 nrows = 0; 2465 fm = rowhit[M]; 2466 do { 2467 c->rows[i][nrows] = fm; 2468 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2469 fm = rowhit[fm]; 2470 } while (fm < M); 2471 } /* ---------------------------------------------------------------------------------------*/ 2472 ierr = PetscFree(cols);CHKERRQ(ierr); 2473 } 2474 2475 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2476 /* 2477 vscale will contain the "diagonal" on processor scalings followed by the off processor 2478 */ 2479 if (ctype == IS_COLORING_GLOBAL) { 2480 PetscInt *garray; 2481 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2482 for (i=0; i<baij->B->cmap->n/bs; i++) { 2483 for (j=0; j<bs; j++) { 2484 garray[i*bs+j] = bs*baij->garray[i]+j; 2485 } 2486 } 2487 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2488 ierr = PetscFree(garray);CHKERRQ(ierr); 2489 CHKMEMQ; 2490 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2491 for (k=0; k<c->ncolors; k++) { 2492 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2493 for (l=0; l<c->nrows[k]; l++) { 2494 col = c->columnsforrow[k][l]; 2495 if (col >= cstart && col < cend) { 2496 /* column is in diagonal block of matrix */ 2497 colb = col - cstart; 2498 } else { 2499 /* column is in "off-processor" part */ 2500 #if defined(PETSC_USE_CTABLE) 2501 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2502 colb--; 2503 #else 2504 colb = baij->colmap[col] - 1; 2505 #endif 2506 colb = colb/bs; 2507 colb += cend - cstart; 2508 } 2509 c->vscaleforrow[k][l] = colb; 2510 } 2511 } 2512 } else if (ctype == IS_COLORING_GHOSTED) { 2513 /* Get gtol mapping */ 2514 PetscInt N = mat->cmap->N,nlocal,*gtol; 2515 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2516 for (i=0; i<N; i++) gtol[i] = -1; 2517 ierr = ISLocalToGlobalMappingGetSize(map,&nlocal);CHKERRQ(ierr); 2518 for (i=0; i<nlocal; i++) gtol[ltog[i]] = i; 2519 2520 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2521 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2522 for (k=0; k<c->ncolors; k++) { 2523 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2524 for (l=0; l<c->nrows[k]; l++) { 2525 col = c->columnsforrow[k][l]; /* global column index */ 2526 2527 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2528 } 2529 } 2530 ierr = PetscFree(gtol);CHKERRQ(ierr); 2531 } 2532 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2533 2534 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2535 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2536 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2537 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2538 if (map) {ierr = ISLocalToGlobalMappingRestoreIndices(map,<og);CHKERRQ(ierr);} 2539 CHKMEMQ; 2540 PetscFunctionReturn(0); 2541 } 2542 2543 #undef __FUNCT__ 2544 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2545 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2546 { 2547 Mat B; 2548 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2549 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2550 Mat_SeqAIJ *b; 2551 PetscErrorCode ierr; 2552 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2553 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2554 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2555 2556 PetscFunctionBegin; 2557 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2558 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2559 2560 /* ---------------------------------------------------------------- 2561 Tell every processor the number of nonzeros per row 2562 */ 2563 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2564 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2565 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2566 } 2567 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2568 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2569 displs = recvcounts + size; 2570 for (i=0; i<size; i++) { 2571 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2572 displs[i] = A->rmap->range[i]/bs; 2573 } 2574 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2575 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2576 #else 2577 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2578 #endif 2579 /* --------------------------------------------------------------- 2580 Create the sequential matrix of the same type as the local block diagonal 2581 */ 2582 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2583 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2584 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2585 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2586 b = (Mat_SeqAIJ*)B->data; 2587 2588 /*-------------------------------------------------------------------- 2589 Copy my part of matrix column indices over 2590 */ 2591 sendcount = ad->nz + bd->nz; 2592 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2593 a_jsendbuf = ad->j; 2594 b_jsendbuf = bd->j; 2595 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2596 cnt = 0; 2597 for (i=0; i<n; i++) { 2598 2599 /* put in lower diagonal portion */ 2600 m = bd->i[i+1] - bd->i[i]; 2601 while (m > 0) { 2602 /* is it above diagonal (in bd (compressed) numbering) */ 2603 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2604 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2605 m--; 2606 } 2607 2608 /* put in diagonal portion */ 2609 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2610 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2611 } 2612 2613 /* put in upper diagonal portion */ 2614 while (m-- > 0) { 2615 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2616 } 2617 } 2618 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2619 2620 /*-------------------------------------------------------------------- 2621 Gather all column indices to all processors 2622 */ 2623 for (i=0; i<size; i++) { 2624 recvcounts[i] = 0; 2625 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2626 recvcounts[i] += lens[j]; 2627 } 2628 } 2629 displs[0] = 0; 2630 for (i=1; i<size; i++) { 2631 displs[i] = displs[i-1] + recvcounts[i-1]; 2632 } 2633 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2634 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2635 #else 2636 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2637 #endif 2638 /*-------------------------------------------------------------------- 2639 Assemble the matrix into useable form (note numerical values not yet set) 2640 */ 2641 /* set the b->ilen (length of each row) values */ 2642 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2643 /* set the b->i indices */ 2644 b->i[0] = 0; 2645 for (i=1; i<=A->rmap->N/bs; i++) { 2646 b->i[i] = b->i[i-1] + lens[i-1]; 2647 } 2648 ierr = PetscFree(lens);CHKERRQ(ierr); 2649 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2650 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2651 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2652 2653 if (A->symmetric) { 2654 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2655 } else if (A->hermitian) { 2656 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2657 } else if (A->structurally_symmetric) { 2658 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2659 } 2660 *newmat = B; 2661 PetscFunctionReturn(0); 2662 } 2663 2664 #undef __FUNCT__ 2665 #define __FUNCT__ "MatSOR_MPIBAIJ" 2666 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2667 { 2668 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2669 PetscErrorCode ierr; 2670 Vec bb1 = 0; 2671 2672 PetscFunctionBegin; 2673 if (flag == SOR_APPLY_UPPER) { 2674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2675 PetscFunctionReturn(0); 2676 } 2677 2678 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2679 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2680 } 2681 2682 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2683 if (flag & SOR_ZERO_INITIAL_GUESS) { 2684 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2685 its--; 2686 } 2687 2688 while (its--) { 2689 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2690 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2691 2692 /* update rhs: bb1 = bb - B*x */ 2693 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2694 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2695 2696 /* local sweep */ 2697 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2698 } 2699 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2700 if (flag & SOR_ZERO_INITIAL_GUESS) { 2701 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2702 its--; 2703 } 2704 while (its--) { 2705 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2706 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2707 2708 /* update rhs: bb1 = bb - B*x */ 2709 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2710 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2711 2712 /* local sweep */ 2713 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2714 } 2715 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2716 if (flag & SOR_ZERO_INITIAL_GUESS) { 2717 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2718 its--; 2719 } 2720 while (its--) { 2721 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2722 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2723 2724 /* update rhs: bb1 = bb - B*x */ 2725 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2726 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2727 2728 /* local sweep */ 2729 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2730 } 2731 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2732 2733 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2734 PetscFunctionReturn(0); 2735 } 2736 2737 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2738 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2741 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2742 { 2743 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2744 PetscErrorCode ierr; 2745 2746 PetscFunctionBegin; 2747 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2748 PetscFunctionReturn(0); 2749 } 2750 2751 2752 /* -------------------------------------------------------------------*/ 2753 static struct _MatOps MatOps_Values = { 2754 MatSetValues_MPIBAIJ, 2755 MatGetRow_MPIBAIJ, 2756 MatRestoreRow_MPIBAIJ, 2757 MatMult_MPIBAIJ, 2758 /* 4*/ MatMultAdd_MPIBAIJ, 2759 MatMultTranspose_MPIBAIJ, 2760 MatMultTransposeAdd_MPIBAIJ, 2761 0, 2762 0, 2763 0, 2764 /*10*/ 0, 2765 0, 2766 0, 2767 MatSOR_MPIBAIJ, 2768 MatTranspose_MPIBAIJ, 2769 /*15*/ MatGetInfo_MPIBAIJ, 2770 MatEqual_MPIBAIJ, 2771 MatGetDiagonal_MPIBAIJ, 2772 MatDiagonalScale_MPIBAIJ, 2773 MatNorm_MPIBAIJ, 2774 /*20*/ MatAssemblyBegin_MPIBAIJ, 2775 MatAssemblyEnd_MPIBAIJ, 2776 MatSetOption_MPIBAIJ, 2777 MatZeroEntries_MPIBAIJ, 2778 /*24*/ MatZeroRows_MPIBAIJ, 2779 0, 2780 0, 2781 0, 2782 0, 2783 /*29*/ MatSetUp_MPIBAIJ, 2784 0, 2785 0, 2786 0, 2787 0, 2788 /*34*/ MatDuplicate_MPIBAIJ, 2789 0, 2790 0, 2791 0, 2792 0, 2793 /*39*/ MatAXPY_MPIBAIJ, 2794 MatGetSubMatrices_MPIBAIJ, 2795 MatIncreaseOverlap_MPIBAIJ, 2796 MatGetValues_MPIBAIJ, 2797 MatCopy_MPIBAIJ, 2798 /*44*/ 0, 2799 MatScale_MPIBAIJ, 2800 0, 2801 0, 2802 0, 2803 /*49*/ 0, 2804 0, 2805 0, 2806 0, 2807 0, 2808 /*54*/ MatFDColoringCreate_MPIBAIJ, 2809 0, 2810 MatSetUnfactored_MPIBAIJ, 2811 MatPermute_MPIBAIJ, 2812 MatSetValuesBlocked_MPIBAIJ, 2813 /*59*/ MatGetSubMatrix_MPIBAIJ, 2814 MatDestroy_MPIBAIJ, 2815 MatView_MPIBAIJ, 2816 0, 2817 0, 2818 /*64*/ 0, 2819 0, 2820 0, 2821 0, 2822 0, 2823 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2824 0, 2825 0, 2826 0, 2827 0, 2828 /*74*/ 0, 2829 MatFDColoringApply_BAIJ, 2830 0, 2831 0, 2832 0, 2833 /*79*/ 0, 2834 0, 2835 0, 2836 0, 2837 MatLoad_MPIBAIJ, 2838 /*84*/ 0, 2839 0, 2840 0, 2841 0, 2842 0, 2843 /*89*/ 0, 2844 0, 2845 0, 2846 0, 2847 0, 2848 /*94*/ 0, 2849 0, 2850 0, 2851 0, 2852 0, 2853 /*99*/ 0, 2854 0, 2855 0, 2856 0, 2857 0, 2858 /*104*/0, 2859 MatRealPart_MPIBAIJ, 2860 MatImaginaryPart_MPIBAIJ, 2861 0, 2862 0, 2863 /*109*/0, 2864 0, 2865 0, 2866 0, 2867 0, 2868 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2869 0, 2870 MatGetGhosts_MPIBAIJ, 2871 0, 2872 0, 2873 /*119*/0, 2874 0, 2875 0, 2876 0, 2877 0, 2878 /*124*/0, 2879 0, 2880 MatInvertBlockDiagonal_MPIBAIJ 2881 }; 2882 2883 #undef __FUNCT__ 2884 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2885 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2886 { 2887 PetscFunctionBegin; 2888 *a = ((Mat_MPIBAIJ*)A->data)->A; 2889 PetscFunctionReturn(0); 2890 } 2891 2892 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2893 2894 #undef __FUNCT__ 2895 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2896 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2897 { 2898 PetscInt m,rstart,cstart,cend; 2899 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2900 const PetscInt *JJ =0; 2901 PetscScalar *values=0; 2902 PetscErrorCode ierr; 2903 2904 PetscFunctionBegin; 2905 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2906 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2907 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2908 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2909 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2910 m = B->rmap->n/bs; 2911 rstart = B->rmap->rstart/bs; 2912 cstart = B->cmap->rstart/bs; 2913 cend = B->cmap->rend/bs; 2914 2915 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2916 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2917 for (i=0; i<m; i++) { 2918 nz = ii[i+1] - ii[i]; 2919 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2920 nz_max = PetscMax(nz_max,nz); 2921 JJ = jj + ii[i]; 2922 for (j=0; j<nz; j++) { 2923 if (*JJ >= cstart) break; 2924 JJ++; 2925 } 2926 d = 0; 2927 for (; j<nz; j++) { 2928 if (*JJ++ >= cend) break; 2929 d++; 2930 } 2931 d_nnz[i] = d; 2932 o_nnz[i] = nz - d; 2933 } 2934 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2935 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2936 2937 values = (PetscScalar*)V; 2938 if (!values) { 2939 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2940 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2941 } 2942 for (i=0; i<m; i++) { 2943 PetscInt row = i + rstart; 2944 PetscInt ncols = ii[i+1] - ii[i]; 2945 const PetscInt *icols = jj + ii[i]; 2946 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2947 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2948 } 2949 2950 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2951 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2952 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2953 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2954 PetscFunctionReturn(0); 2955 } 2956 2957 #undef __FUNCT__ 2958 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2959 /*@C 2960 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2961 (the default parallel PETSc format). 2962 2963 Collective on MPI_Comm 2964 2965 Input Parameters: 2966 + A - the matrix 2967 . bs - the block size 2968 . i - the indices into j for the start of each local row (starts with zero) 2969 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2970 - v - optional values in the matrix 2971 2972 Level: developer 2973 2974 .keywords: matrix, aij, compressed row, sparse, parallel 2975 2976 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2977 @*/ 2978 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2979 { 2980 PetscErrorCode ierr; 2981 2982 PetscFunctionBegin; 2983 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2984 PetscValidType(B,1); 2985 PetscValidLogicalCollectiveInt(B,bs,2); 2986 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2987 PetscFunctionReturn(0); 2988 } 2989 2990 #undef __FUNCT__ 2991 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2992 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2993 { 2994 Mat_MPIBAIJ *b; 2995 PetscErrorCode ierr; 2996 PetscInt i; 2997 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 2998 2999 PetscFunctionBegin; 3000 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3001 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3002 3003 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3004 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3005 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3006 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3007 3008 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3009 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3010 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3011 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3012 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3013 3014 if (d_nnz) { 3015 for (i=0; i<B->rmap->n/bs; i++) { 3016 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3017 } 3018 } 3019 if (o_nnz) { 3020 for (i=0; i<B->rmap->n/bs; i++) { 3021 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3022 } 3023 } 3024 3025 b = (Mat_MPIBAIJ*)B->data; 3026 b->bs2 = bs*bs; 3027 b->mbs = B->rmap->n/bs; 3028 b->nbs = B->cmap->n/bs; 3029 b->Mbs = B->rmap->N/bs; 3030 b->Nbs = B->cmap->N/bs; 3031 3032 for (i=0; i<=b->size; i++) { 3033 b->rangebs[i] = B->rmap->range[i]/bs; 3034 } 3035 b->rstartbs = B->rmap->rstart/bs; 3036 b->rendbs = B->rmap->rend/bs; 3037 b->cstartbs = B->cmap->rstart/bs; 3038 b->cendbs = B->cmap->rend/bs; 3039 3040 if (!B->preallocated) { 3041 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3042 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3043 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3044 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3045 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3046 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3047 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3048 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3049 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 3050 } 3051 3052 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3053 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3054 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3055 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3056 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3057 B->preallocated = PETSC_TRUE; 3058 PetscFunctionReturn(0); 3059 } 3060 3061 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3062 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3063 3064 #undef __FUNCT__ 3065 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3066 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 3067 { 3068 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3069 PetscErrorCode ierr; 3070 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3071 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3072 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3073 3074 PetscFunctionBegin; 3075 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3076 ii[0] = 0; 3077 CHKMEMQ; 3078 for (i=0; i<M; i++) { 3079 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3080 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3081 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3082 /* remove one from count of matrix has diagonal */ 3083 for (j=id[i]; j<id[i+1]; j++) { 3084 if (jd[j] == i) {ii[i+1]--;break;} 3085 } 3086 CHKMEMQ; 3087 } 3088 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3089 cnt = 0; 3090 for (i=0; i<M; i++) { 3091 for (j=io[i]; j<io[i+1]; j++) { 3092 if (garray[jo[j]] > rstart) break; 3093 jj[cnt++] = garray[jo[j]]; 3094 CHKMEMQ; 3095 } 3096 for (k=id[i]; k<id[i+1]; k++) { 3097 if (jd[k] != i) { 3098 jj[cnt++] = rstart + jd[k]; 3099 CHKMEMQ; 3100 } 3101 } 3102 for (; j<io[i+1]; j++) { 3103 jj[cnt++] = garray[jo[j]]; 3104 CHKMEMQ; 3105 } 3106 } 3107 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 3108 PetscFunctionReturn(0); 3109 } 3110 3111 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3112 3113 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 3114 3115 #undef __FUNCT__ 3116 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3117 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 3118 { 3119 PetscErrorCode ierr; 3120 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3121 Mat B; 3122 Mat_MPIAIJ *b; 3123 3124 PetscFunctionBegin; 3125 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 3126 3127 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3128 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3129 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3130 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3131 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 3132 b = (Mat_MPIAIJ*) B->data; 3133 3134 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3135 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3136 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3137 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3138 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3139 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3140 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3141 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3142 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3143 if (reuse == MAT_REUSE_MATRIX) { 3144 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3145 } else { 3146 *newmat = B; 3147 } 3148 PetscFunctionReturn(0); 3149 } 3150 3151 #if defined(PETSC_HAVE_MUMPS) 3152 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3153 #endif 3154 3155 /*MC 3156 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3157 3158 Options Database Keys: 3159 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3160 . -mat_block_size <bs> - set the blocksize used to store the matrix 3161 - -mat_use_hash_table <fact> 3162 3163 Level: beginner 3164 3165 .seealso: MatCreateMPIBAIJ 3166 M*/ 3167 3168 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3169 3170 #undef __FUNCT__ 3171 #define __FUNCT__ "MatCreate_MPIBAIJ" 3172 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3173 { 3174 Mat_MPIBAIJ *b; 3175 PetscErrorCode ierr; 3176 PetscBool flg; 3177 3178 PetscFunctionBegin; 3179 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3180 B->data = (void*)b; 3181 3182 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3183 B->assembled = PETSC_FALSE; 3184 3185 B->insertmode = NOT_SET_VALUES; 3186 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3187 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3188 3189 /* build local table of row and column ownerships */ 3190 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3191 3192 /* build cache for off array entries formed */ 3193 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 3194 3195 b->donotstash = PETSC_FALSE; 3196 b->colmap = NULL; 3197 b->garray = NULL; 3198 b->roworiented = PETSC_TRUE; 3199 3200 /* stuff used in block assembly */ 3201 b->barray = 0; 3202 3203 /* stuff used for matrix vector multiply */ 3204 b->lvec = 0; 3205 b->Mvctx = 0; 3206 3207 /* stuff for MatGetRow() */ 3208 b->rowindices = 0; 3209 b->rowvalues = 0; 3210 b->getrowactive = PETSC_FALSE; 3211 3212 /* hash table stuff */ 3213 b->ht = 0; 3214 b->hd = 0; 3215 b->ht_size = 0; 3216 b->ht_flag = PETSC_FALSE; 3217 b->ht_fact = 0; 3218 b->ht_total_ct = 0; 3219 b->ht_insert_ct = 0; 3220 3221 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3222 b->ijonly = PETSC_FALSE; 3223 3224 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3225 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 3226 if (flg) { 3227 PetscReal fact = 1.39; 3228 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3229 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3230 if (fact <= 1.0) fact = 1.39; 3231 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3232 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3233 } 3234 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3235 3236 #if defined(PETSC_HAVE_MUMPS) 3237 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3238 #endif 3239 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C","MatConvert_MPIBAIJ_MPIAdj",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3240 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C","MatConvert_MPIBAIJ_MPIAIJ",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3241 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C","MatConvert_MPIBAIJ_MPISBAIJ",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3242 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_MPIBAIJ",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3243 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_MPIBAIJ",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3244 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C","MatGetDiagonalBlock_MPIBAIJ",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3245 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C","MatMPIBAIJSetPreallocation_MPIBAIJ",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3246 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C","MatMPIBAIJSetPreallocationCSR_MPIBAIJ",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3247 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C","MatDiagonalScaleLocal_MPIBAIJ",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3248 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C","MatSetHashTableFactor_MPIBAIJ",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3249 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C","MatConvert_MPIBAIJ_MPIBSTRM",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3250 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3251 PetscFunctionReturn(0); 3252 } 3253 3254 /*MC 3255 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3256 3257 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3258 and MATMPIBAIJ otherwise. 3259 3260 Options Database Keys: 3261 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3262 3263 Level: beginner 3264 3265 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3266 M*/ 3267 3268 #undef __FUNCT__ 3269 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3270 /*@C 3271 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3272 (block compressed row). For good matrix assembly performance 3273 the user should preallocate the matrix storage by setting the parameters 3274 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3275 performance can be increased by more than a factor of 50. 3276 3277 Collective on Mat 3278 3279 Input Parameters: 3280 + A - the matrix 3281 . bs - size of blockk 3282 . d_nz - number of block nonzeros per block row in diagonal portion of local 3283 submatrix (same for all local rows) 3284 . d_nnz - array containing the number of block nonzeros in the various block rows 3285 of the in diagonal portion of the local (possibly different for each block 3286 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3287 set it even if it is zero. 3288 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3289 submatrix (same for all local rows). 3290 - o_nnz - array containing the number of nonzeros in the various block rows of the 3291 off-diagonal portion of the local submatrix (possibly different for 3292 each block row) or NULL. 3293 3294 If the *_nnz parameter is given then the *_nz parameter is ignored 3295 3296 Options Database Keys: 3297 + -mat_block_size - size of the blocks to use 3298 - -mat_use_hash_table <fact> 3299 3300 Notes: 3301 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3302 than it must be used on all processors that share the object for that argument. 3303 3304 Storage Information: 3305 For a square global matrix we define each processor's diagonal portion 3306 to be its local rows and the corresponding columns (a square submatrix); 3307 each processor's off-diagonal portion encompasses the remainder of the 3308 local matrix (a rectangular submatrix). 3309 3310 The user can specify preallocated storage for the diagonal part of 3311 the local submatrix with either d_nz or d_nnz (not both). Set 3312 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3313 memory allocation. Likewise, specify preallocated storage for the 3314 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3315 3316 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3317 the figure below we depict these three local rows and all columns (0-11). 3318 3319 .vb 3320 0 1 2 3 4 5 6 7 8 9 10 11 3321 ------------------- 3322 row 3 | o o o d d d o o o o o o 3323 row 4 | o o o d d d o o o o o o 3324 row 5 | o o o d d d o o o o o o 3325 ------------------- 3326 .ve 3327 3328 Thus, any entries in the d locations are stored in the d (diagonal) 3329 submatrix, and any entries in the o locations are stored in the 3330 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3331 stored simply in the MATSEQBAIJ format for compressed row storage. 3332 3333 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3334 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3335 In general, for PDE problems in which most nonzeros are near the diagonal, 3336 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3337 or you will get TERRIBLE performance; see the users' manual chapter on 3338 matrices. 3339 3340 You can call MatGetInfo() to get information on how effective the preallocation was; 3341 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3342 You can also run with the option -info and look for messages with the string 3343 malloc in them to see if additional memory allocation was needed. 3344 3345 Level: intermediate 3346 3347 .keywords: matrix, block, aij, compressed row, sparse, parallel 3348 3349 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3350 @*/ 3351 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3352 { 3353 PetscErrorCode ierr; 3354 3355 PetscFunctionBegin; 3356 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3357 PetscValidType(B,1); 3358 PetscValidLogicalCollectiveInt(B,bs,2); 3359 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3360 PetscFunctionReturn(0); 3361 } 3362 3363 #undef __FUNCT__ 3364 #define __FUNCT__ "MatCreateBAIJ" 3365 /*@C 3366 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3367 (block compressed row). For good matrix assembly performance 3368 the user should preallocate the matrix storage by setting the parameters 3369 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3370 performance can be increased by more than a factor of 50. 3371 3372 Collective on MPI_Comm 3373 3374 Input Parameters: 3375 + comm - MPI communicator 3376 . bs - size of blockk 3377 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3378 This value should be the same as the local size used in creating the 3379 y vector for the matrix-vector product y = Ax. 3380 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3381 This value should be the same as the local size used in creating the 3382 x vector for the matrix-vector product y = Ax. 3383 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3384 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3385 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3386 submatrix (same for all local rows) 3387 . d_nnz - array containing the number of nonzero blocks in the various block rows 3388 of the in diagonal portion of the local (possibly different for each block 3389 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3390 and set it even if it is zero. 3391 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3392 submatrix (same for all local rows). 3393 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3394 off-diagonal portion of the local submatrix (possibly different for 3395 each block row) or NULL. 3396 3397 Output Parameter: 3398 . A - the matrix 3399 3400 Options Database Keys: 3401 + -mat_block_size - size of the blocks to use 3402 - -mat_use_hash_table <fact> 3403 3404 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3405 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3406 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3407 3408 Notes: 3409 If the *_nnz parameter is given then the *_nz parameter is ignored 3410 3411 A nonzero block is any block that as 1 or more nonzeros in it 3412 3413 The user MUST specify either the local or global matrix dimensions 3414 (possibly both). 3415 3416 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3417 than it must be used on all processors that share the object for that argument. 3418 3419 Storage Information: 3420 For a square global matrix we define each processor's diagonal portion 3421 to be its local rows and the corresponding columns (a square submatrix); 3422 each processor's off-diagonal portion encompasses the remainder of the 3423 local matrix (a rectangular submatrix). 3424 3425 The user can specify preallocated storage for the diagonal part of 3426 the local submatrix with either d_nz or d_nnz (not both). Set 3427 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3428 memory allocation. Likewise, specify preallocated storage for the 3429 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3430 3431 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3432 the figure below we depict these three local rows and all columns (0-11). 3433 3434 .vb 3435 0 1 2 3 4 5 6 7 8 9 10 11 3436 ------------------- 3437 row 3 | o o o d d d o o o o o o 3438 row 4 | o o o d d d o o o o o o 3439 row 5 | o o o d d d o o o o o o 3440 ------------------- 3441 .ve 3442 3443 Thus, any entries in the d locations are stored in the d (diagonal) 3444 submatrix, and any entries in the o locations are stored in the 3445 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3446 stored simply in the MATSEQBAIJ format for compressed row storage. 3447 3448 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3449 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3450 In general, for PDE problems in which most nonzeros are near the diagonal, 3451 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3452 or you will get TERRIBLE performance; see the users' manual chapter on 3453 matrices. 3454 3455 Level: intermediate 3456 3457 .keywords: matrix, block, aij, compressed row, sparse, parallel 3458 3459 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3460 @*/ 3461 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3462 { 3463 PetscErrorCode ierr; 3464 PetscMPIInt size; 3465 3466 PetscFunctionBegin; 3467 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3468 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3469 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3470 if (size > 1) { 3471 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3472 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3473 } else { 3474 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3475 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3476 } 3477 PetscFunctionReturn(0); 3478 } 3479 3480 #undef __FUNCT__ 3481 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3482 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3483 { 3484 Mat mat; 3485 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3486 PetscErrorCode ierr; 3487 PetscInt len=0; 3488 3489 PetscFunctionBegin; 3490 *newmat = 0; 3491 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3492 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3493 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3494 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3495 3496 mat->factortype = matin->factortype; 3497 mat->preallocated = PETSC_TRUE; 3498 mat->assembled = PETSC_TRUE; 3499 mat->insertmode = NOT_SET_VALUES; 3500 3501 a = (Mat_MPIBAIJ*)mat->data; 3502 mat->rmap->bs = matin->rmap->bs; 3503 a->bs2 = oldmat->bs2; 3504 a->mbs = oldmat->mbs; 3505 a->nbs = oldmat->nbs; 3506 a->Mbs = oldmat->Mbs; 3507 a->Nbs = oldmat->Nbs; 3508 3509 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3510 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3511 3512 a->size = oldmat->size; 3513 a->rank = oldmat->rank; 3514 a->donotstash = oldmat->donotstash; 3515 a->roworiented = oldmat->roworiented; 3516 a->rowindices = 0; 3517 a->rowvalues = 0; 3518 a->getrowactive = PETSC_FALSE; 3519 a->barray = 0; 3520 a->rstartbs = oldmat->rstartbs; 3521 a->rendbs = oldmat->rendbs; 3522 a->cstartbs = oldmat->cstartbs; 3523 a->cendbs = oldmat->cendbs; 3524 3525 /* hash table stuff */ 3526 a->ht = 0; 3527 a->hd = 0; 3528 a->ht_size = 0; 3529 a->ht_flag = oldmat->ht_flag; 3530 a->ht_fact = oldmat->ht_fact; 3531 a->ht_total_ct = 0; 3532 a->ht_insert_ct = 0; 3533 3534 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3535 if (oldmat->colmap) { 3536 #if defined(PETSC_USE_CTABLE) 3537 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3538 #else 3539 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3540 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3541 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3542 #endif 3543 } else a->colmap = 0; 3544 3545 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3546 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3547 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3548 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3549 } else a->garray = 0; 3550 3551 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3552 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3553 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3554 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3555 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3556 3557 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3558 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3559 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3560 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3561 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3562 *newmat = mat; 3563 PetscFunctionReturn(0); 3564 } 3565 3566 #undef __FUNCT__ 3567 #define __FUNCT__ "MatLoad_MPIBAIJ" 3568 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3569 { 3570 PetscErrorCode ierr; 3571 int fd; 3572 PetscInt i,nz,j,rstart,rend; 3573 PetscScalar *vals,*buf; 3574 MPI_Comm comm; 3575 MPI_Status status; 3576 PetscMPIInt rank,size,maxnz; 3577 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3578 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3579 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3580 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3581 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3582 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3583 3584 PetscFunctionBegin; 3585 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3586 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3587 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3588 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3589 3590 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3591 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3592 if (!rank) { 3593 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3594 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3595 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3596 } 3597 3598 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3599 3600 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3601 M = header[1]; N = header[2]; 3602 3603 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3604 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3605 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3606 3607 /* If global sizes are set, check if they are consistent with that given in the file */ 3608 if (sizesset) { 3609 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3610 } 3611 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3612 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3613 3614 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3615 3616 /* 3617 This code adds extra rows to make sure the number of rows is 3618 divisible by the blocksize 3619 */ 3620 Mbs = M/bs; 3621 extra_rows = bs - M + bs*Mbs; 3622 if (extra_rows == bs) extra_rows = 0; 3623 else Mbs++; 3624 if (extra_rows && !rank) { 3625 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3626 } 3627 3628 /* determine ownership of all rows */ 3629 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3630 mbs = Mbs/size + ((Mbs % size) > rank); 3631 m = mbs*bs; 3632 } else { /* User set */ 3633 m = newmat->rmap->n; 3634 mbs = m/bs; 3635 } 3636 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3637 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3638 3639 /* process 0 needs enough room for process with most rows */ 3640 if (!rank) { 3641 mmax = rowners[1]; 3642 for (i=2; i<=size; i++) { 3643 mmax = PetscMax(mmax,rowners[i]); 3644 } 3645 mmax*=bs; 3646 } else mmax = m; 3647 3648 rowners[0] = 0; 3649 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3650 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3651 rstart = rowners[rank]; 3652 rend = rowners[rank+1]; 3653 3654 /* distribute row lengths to all processors */ 3655 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3656 if (!rank) { 3657 mend = m; 3658 if (size == 1) mend = mend - extra_rows; 3659 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3660 for (j=mend; j<m; j++) locrowlens[j] = 1; 3661 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3662 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3663 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3664 for (j=0; j<m; j++) { 3665 procsnz[0] += locrowlens[j]; 3666 } 3667 for (i=1; i<size; i++) { 3668 mend = browners[i+1] - browners[i]; 3669 if (i == size-1) mend = mend - extra_rows; 3670 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3671 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3672 /* calculate the number of nonzeros on each processor */ 3673 for (j=0; j<browners[i+1]-browners[i]; j++) { 3674 procsnz[i] += rowlengths[j]; 3675 } 3676 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3677 } 3678 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3679 } else { 3680 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3681 } 3682 3683 if (!rank) { 3684 /* determine max buffer needed and allocate it */ 3685 maxnz = procsnz[0]; 3686 for (i=1; i<size; i++) { 3687 maxnz = PetscMax(maxnz,procsnz[i]); 3688 } 3689 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3690 3691 /* read in my part of the matrix column indices */ 3692 nz = procsnz[0]; 3693 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3694 mycols = ibuf; 3695 if (size == 1) nz -= extra_rows; 3696 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3697 if (size == 1) { 3698 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3699 } 3700 3701 /* read in every ones (except the last) and ship off */ 3702 for (i=1; i<size-1; i++) { 3703 nz = procsnz[i]; 3704 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3705 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3706 } 3707 /* read in the stuff for the last proc */ 3708 if (size != 1) { 3709 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3710 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3711 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3712 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3713 } 3714 ierr = PetscFree(cols);CHKERRQ(ierr); 3715 } else { 3716 /* determine buffer space needed for message */ 3717 nz = 0; 3718 for (i=0; i<m; i++) { 3719 nz += locrowlens[i]; 3720 } 3721 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3722 mycols = ibuf; 3723 /* receive message of column indices*/ 3724 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3725 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3726 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3727 } 3728 3729 /* loop over local rows, determining number of off diagonal entries */ 3730 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3731 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3732 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3733 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3734 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3735 rowcount = 0; nzcount = 0; 3736 for (i=0; i<mbs; i++) { 3737 dcount = 0; 3738 odcount = 0; 3739 for (j=0; j<bs; j++) { 3740 kmax = locrowlens[rowcount]; 3741 for (k=0; k<kmax; k++) { 3742 tmp = mycols[nzcount++]/bs; 3743 if (!mask[tmp]) { 3744 mask[tmp] = 1; 3745 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3746 else masked1[dcount++] = tmp; 3747 } 3748 } 3749 rowcount++; 3750 } 3751 3752 dlens[i] = dcount; 3753 odlens[i] = odcount; 3754 3755 /* zero out the mask elements we set */ 3756 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3757 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3758 } 3759 3760 3761 if (!sizesset) { 3762 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3763 } 3764 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3765 3766 if (!rank) { 3767 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3768 /* read in my part of the matrix numerical values */ 3769 nz = procsnz[0]; 3770 vals = buf; 3771 mycols = ibuf; 3772 if (size == 1) nz -= extra_rows; 3773 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3774 if (size == 1) { 3775 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3776 } 3777 3778 /* insert into matrix */ 3779 jj = rstart*bs; 3780 for (i=0; i<m; i++) { 3781 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3782 mycols += locrowlens[i]; 3783 vals += locrowlens[i]; 3784 jj++; 3785 } 3786 /* read in other processors (except the last one) and ship out */ 3787 for (i=1; i<size-1; i++) { 3788 nz = procsnz[i]; 3789 vals = buf; 3790 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3791 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3792 } 3793 /* the last proc */ 3794 if (size != 1) { 3795 nz = procsnz[i] - extra_rows; 3796 vals = buf; 3797 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3798 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3799 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3800 } 3801 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3802 } else { 3803 /* receive numeric values */ 3804 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3805 3806 /* receive message of values*/ 3807 vals = buf; 3808 mycols = ibuf; 3809 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3810 3811 /* insert into matrix */ 3812 jj = rstart*bs; 3813 for (i=0; i<m; i++) { 3814 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3815 mycols += locrowlens[i]; 3816 vals += locrowlens[i]; 3817 jj++; 3818 } 3819 } 3820 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3821 ierr = PetscFree(buf);CHKERRQ(ierr); 3822 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3823 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3824 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3825 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3826 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3827 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3828 PetscFunctionReturn(0); 3829 } 3830 3831 #undef __FUNCT__ 3832 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3833 /*@ 3834 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3835 3836 Input Parameters: 3837 . mat - the matrix 3838 . fact - factor 3839 3840 Not Collective, each process can use a different factor 3841 3842 Level: advanced 3843 3844 Notes: 3845 This can also be set by the command line option: -mat_use_hash_table <fact> 3846 3847 .keywords: matrix, hashtable, factor, HT 3848 3849 .seealso: MatSetOption() 3850 @*/ 3851 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3852 { 3853 PetscErrorCode ierr; 3854 3855 PetscFunctionBegin; 3856 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3857 PetscFunctionReturn(0); 3858 } 3859 3860 #undef __FUNCT__ 3861 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3862 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3863 { 3864 Mat_MPIBAIJ *baij; 3865 3866 PetscFunctionBegin; 3867 baij = (Mat_MPIBAIJ*)mat->data; 3868 baij->ht_fact = fact; 3869 PetscFunctionReturn(0); 3870 } 3871 3872 #undef __FUNCT__ 3873 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3874 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3875 { 3876 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3877 3878 PetscFunctionBegin; 3879 *Ad = a->A; 3880 *Ao = a->B; 3881 *colmap = a->garray; 3882 PetscFunctionReturn(0); 3883 } 3884 3885 /* 3886 Special version for direct calls from Fortran (to eliminate two function call overheads 3887 */ 3888 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3889 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3890 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3891 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3892 #endif 3893 3894 #undef __FUNCT__ 3895 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3896 /*@C 3897 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3898 3899 Collective on Mat 3900 3901 Input Parameters: 3902 + mat - the matrix 3903 . min - number of input rows 3904 . im - input rows 3905 . nin - number of input columns 3906 . in - input columns 3907 . v - numerical values input 3908 - addvin - INSERT_VALUES or ADD_VALUES 3909 3910 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3911 3912 Level: advanced 3913 3914 .seealso: MatSetValuesBlocked() 3915 @*/ 3916 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3917 { 3918 /* convert input arguments to C version */ 3919 Mat mat = *matin; 3920 PetscInt m = *min, n = *nin; 3921 InsertMode addv = *addvin; 3922 3923 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3924 const MatScalar *value; 3925 MatScalar *barray = baij->barray; 3926 PetscBool roworiented = baij->roworiented; 3927 PetscErrorCode ierr; 3928 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3929 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3930 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3931 3932 PetscFunctionBegin; 3933 /* tasks normally handled by MatSetValuesBlocked() */ 3934 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3935 #if defined(PETSC_USE_DEBUG) 3936 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3937 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3938 #endif 3939 if (mat->assembled) { 3940 mat->was_assembled = PETSC_TRUE; 3941 mat->assembled = PETSC_FALSE; 3942 } 3943 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3944 3945 3946 if (!barray) { 3947 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3948 baij->barray = barray; 3949 } 3950 3951 if (roworiented) stepval = (n-1)*bs; 3952 else stepval = (m-1)*bs; 3953 3954 for (i=0; i<m; i++) { 3955 if (im[i] < 0) continue; 3956 #if defined(PETSC_USE_DEBUG) 3957 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3958 #endif 3959 if (im[i] >= rstart && im[i] < rend) { 3960 row = im[i] - rstart; 3961 for (j=0; j<n; j++) { 3962 /* If NumCol = 1 then a copy is not required */ 3963 if ((roworiented) && (n == 1)) { 3964 barray = (MatScalar*)v + i*bs2; 3965 } else if ((!roworiented) && (m == 1)) { 3966 barray = (MatScalar*)v + j*bs2; 3967 } else { /* Here a copy is required */ 3968 if (roworiented) { 3969 value = v + i*(stepval+bs)*bs + j*bs; 3970 } else { 3971 value = v + j*(stepval+bs)*bs + i*bs; 3972 } 3973 for (ii=0; ii<bs; ii++,value+=stepval) { 3974 for (jj=0; jj<bs; jj++) { 3975 *barray++ = *value++; 3976 } 3977 } 3978 barray -=bs2; 3979 } 3980 3981 if (in[j] >= cstart && in[j] < cend) { 3982 col = in[j] - cstart; 3983 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3984 } else if (in[j] < 0) continue; 3985 #if defined(PETSC_USE_DEBUG) 3986 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3987 #endif 3988 else { 3989 if (mat->was_assembled) { 3990 if (!baij->colmap) { 3991 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3992 } 3993 3994 #if defined(PETSC_USE_DEBUG) 3995 #if defined(PETSC_USE_CTABLE) 3996 { PetscInt data; 3997 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3998 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3999 } 4000 #else 4001 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4002 #endif 4003 #endif 4004 #if defined(PETSC_USE_CTABLE) 4005 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4006 col = (col - 1)/bs; 4007 #else 4008 col = (baij->colmap[in[j]] - 1)/bs; 4009 #endif 4010 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4011 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4012 col = in[j]; 4013 } 4014 } else col = in[j]; 4015 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4016 } 4017 } 4018 } else { 4019 if (!baij->donotstash) { 4020 if (roworiented) { 4021 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4022 } else { 4023 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4024 } 4025 } 4026 } 4027 } 4028 4029 /* task normally handled by MatSetValuesBlocked() */ 4030 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4031 PetscFunctionReturn(0); 4032 } 4033 4034 #undef __FUNCT__ 4035 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4036 /*@ 4037 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4038 CSR format the local rows. 4039 4040 Collective on MPI_Comm 4041 4042 Input Parameters: 4043 + comm - MPI communicator 4044 . bs - the block size, only a block size of 1 is supported 4045 . m - number of local rows (Cannot be PETSC_DECIDE) 4046 . n - This value should be the same as the local size used in creating the 4047 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4048 calculated if N is given) For square matrices n is almost always m. 4049 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4050 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4051 . i - row indices 4052 . j - column indices 4053 - a - matrix values 4054 4055 Output Parameter: 4056 . mat - the matrix 4057 4058 Level: intermediate 4059 4060 Notes: 4061 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4062 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4063 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4064 4065 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4066 4067 .keywords: matrix, aij, compressed row, sparse, parallel 4068 4069 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4070 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4071 @*/ 4072 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4073 { 4074 PetscErrorCode ierr; 4075 4076 PetscFunctionBegin; 4077 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4078 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4079 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4080 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4081 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4082 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4083 PetscFunctionReturn(0); 4084 } 4085