1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) { 29 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 30 } 31 } 32 33 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 34 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 35 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 36 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 37 38 for (i=0; i<A->rmap->n; i++) { 39 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 40 va[i] = vb[i]; 41 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 42 } 43 } 44 45 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 46 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 47 ierr = PetscFree(idxb);CHKERRQ(ierr); 48 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 52 EXTERN_C_BEGIN 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 55 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 56 { 57 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 58 PetscErrorCode ierr; 59 60 PetscFunctionBegin; 61 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 62 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 EXTERN_C_END 66 67 EXTERN_C_BEGIN 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 70 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 71 { 72 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 77 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 EXTERN_C_END 81 82 /* 83 Local utility routine that creates a mapping from the global column 84 number to the local number in the off-diagonal part of the local 85 storage of the matrix. This is done in a non scalable way since the 86 length of colmap equals the global matrix length. 87 */ 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 90 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 91 { 92 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 93 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 94 PetscErrorCode ierr; 95 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 96 97 PetscFunctionBegin; 98 #if defined(PETSC_USE_CTABLE) 99 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 100 for (i=0; i<nbs; i++) { 101 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 102 } 103 #else 104 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 105 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 106 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 107 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 108 #endif 109 PetscFunctionReturn(0); 110 } 111 112 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 113 { \ 114 \ 115 brow = row/bs; \ 116 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 117 rmax = aimax[brow]; nrow = ailen[brow]; \ 118 bcol = col/bs; \ 119 ridx = row % bs; cidx = col % bs; \ 120 low = 0; high = nrow; \ 121 while (high-low > 3) { \ 122 t = (low+high)/2; \ 123 if (rp[t] > bcol) high = t; \ 124 else low = t; \ 125 } \ 126 for (_i=low; _i<high; _i++) { \ 127 if (rp[_i] > bcol) break; \ 128 if (rp[_i] == bcol) { \ 129 bap = ap + bs2*_i + bs*cidx + ridx; \ 130 if (addv == ADD_VALUES) *bap += value; \ 131 else *bap = value; \ 132 goto a_noinsert; \ 133 } \ 134 } \ 135 if (a->nonew == 1) goto a_noinsert; \ 136 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 137 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 138 N = nrow++ - 1; \ 139 /* shift up all the later entries in this row */ \ 140 for (ii=N; ii>=_i; ii--) { \ 141 rp[ii+1] = rp[ii]; \ 142 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 143 } \ 144 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 145 rp[_i] = bcol; \ 146 ap[bs2*_i + bs*cidx + ridx] = value; \ 147 a_noinsert:; \ 148 ailen[brow] = nrow; \ 149 } 150 151 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 152 { \ 153 brow = row/bs; \ 154 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 155 rmax = bimax[brow]; nrow = bilen[brow]; \ 156 bcol = col/bs; \ 157 ridx = row % bs; cidx = col % bs; \ 158 low = 0; high = nrow; \ 159 while (high-low > 3) { \ 160 t = (low+high)/2; \ 161 if (rp[t] > bcol) high = t; \ 162 else low = t; \ 163 } \ 164 for (_i=low; _i<high; _i++) { \ 165 if (rp[_i] > bcol) break; \ 166 if (rp[_i] == bcol) { \ 167 bap = ap + bs2*_i + bs*cidx + ridx; \ 168 if (addv == ADD_VALUES) *bap += value; \ 169 else *bap = value; \ 170 goto b_noinsert; \ 171 } \ 172 } \ 173 if (b->nonew == 1) goto b_noinsert; \ 174 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 175 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 176 CHKMEMQ; \ 177 N = nrow++ - 1; \ 178 /* shift up all the later entries in this row */ \ 179 for (ii=N; ii>=_i; ii--) { \ 180 rp[ii+1] = rp[ii]; \ 181 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 182 } \ 183 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 184 rp[_i] = bcol; \ 185 ap[bs2*_i + bs*cidx + ridx] = value; \ 186 b_noinsert:; \ 187 bilen[brow] = nrow; \ 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatSetValues_MPIBAIJ" 192 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 193 { 194 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 195 MatScalar value; 196 PetscBool roworiented = baij->roworiented; 197 PetscErrorCode ierr; 198 PetscInt i,j,row,col; 199 PetscInt rstart_orig=mat->rmap->rstart; 200 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 201 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 202 203 /* Some Variables required in the macro */ 204 Mat A = baij->A; 205 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 206 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 207 MatScalar *aa =a->a; 208 209 Mat B = baij->B; 210 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 211 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 212 MatScalar *ba =b->a; 213 214 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 215 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 216 MatScalar *ap,*bap; 217 218 PetscFunctionBegin; 219 if (v) PetscValidScalarPointer(v,6); 220 for (i=0; i<m; i++) { 221 if (im[i] < 0) continue; 222 #if defined(PETSC_USE_DEBUG) 223 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 224 #endif 225 if (im[i] >= rstart_orig && im[i] < rend_orig) { 226 row = im[i] - rstart_orig; 227 for (j=0; j<n; j++) { 228 if (in[j] >= cstart_orig && in[j] < cend_orig) { 229 col = in[j] - cstart_orig; 230 if (roworiented) value = v[i*n+j]; 231 else value = v[i+j*m]; 232 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 233 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 234 } else if (in[j] < 0) continue; 235 #if defined(PETSC_USE_DEBUG) 236 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 237 #endif 238 else { 239 if (mat->was_assembled) { 240 if (!baij->colmap) { 241 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 242 } 243 #if defined(PETSC_USE_CTABLE) 244 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 245 col = col - 1; 246 #else 247 col = baij->colmap[in[j]/bs] - 1; 248 #endif 249 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 250 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 251 col = in[j]; 252 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 253 B = baij->B; 254 b = (Mat_SeqBAIJ*)(B)->data; 255 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 256 ba =b->a; 257 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 258 else col += in[j]%bs; 259 } else col = in[j]; 260 if (roworiented) value = v[i*n+j]; 261 else value = v[i+j*m]; 262 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 263 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 264 } 265 } 266 } else { 267 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 268 if (!baij->donotstash) { 269 mat->assembled = PETSC_FALSE; 270 if (roworiented) { 271 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 272 } else { 273 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 274 } 275 } 276 } 277 } 278 PetscFunctionReturn(0); 279 } 280 281 #undef __FUNCT__ 282 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 283 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 284 { 285 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 286 const PetscScalar *value; 287 MatScalar *barray = baij->barray; 288 PetscBool roworiented = baij->roworiented; 289 PetscErrorCode ierr; 290 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 291 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 292 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 293 294 PetscFunctionBegin; 295 if (!barray) { 296 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 297 baij->barray = barray; 298 } 299 300 if (roworiented) stepval = (n-1)*bs; 301 else stepval = (m-1)*bs; 302 303 for (i=0; i<m; i++) { 304 if (im[i] < 0) continue; 305 #if defined(PETSC_USE_DEBUG) 306 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 307 #endif 308 if (im[i] >= rstart && im[i] < rend) { 309 row = im[i] - rstart; 310 for (j=0; j<n; j++) { 311 /* If NumCol = 1 then a copy is not required */ 312 if ((roworiented) && (n == 1)) { 313 barray = (MatScalar*)v + i*bs2; 314 } else if ((!roworiented) && (m == 1)) { 315 barray = (MatScalar*)v + j*bs2; 316 } else { /* Here a copy is required */ 317 if (roworiented) { 318 value = v + (i*(stepval+bs) + j)*bs; 319 } else { 320 value = v + (j*(stepval+bs) + i)*bs; 321 } 322 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 323 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 324 barray += bs; 325 } 326 barray -= bs2; 327 } 328 329 if (in[j] >= cstart && in[j] < cend) { 330 col = in[j] - cstart; 331 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 332 } else if (in[j] < 0) continue; 333 #if defined(PETSC_USE_DEBUG) 334 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 335 #endif 336 else { 337 if (mat->was_assembled) { 338 if (!baij->colmap) { 339 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 340 } 341 342 #if defined(PETSC_USE_DEBUG) 343 #if defined(PETSC_USE_CTABLE) 344 { PetscInt data; 345 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 346 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 347 } 348 #else 349 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 350 #endif 351 #endif 352 #if defined(PETSC_USE_CTABLE) 353 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 354 col = (col - 1)/bs; 355 #else 356 col = (baij->colmap[in[j]] - 1)/bs; 357 #endif 358 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 359 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 360 col = in[j]; 361 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 362 } else col = in[j]; 363 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364 } 365 } 366 } else { 367 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 368 if (!baij->donotstash) { 369 if (roworiented) { 370 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } else { 372 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 373 } 374 } 375 } 376 } 377 PetscFunctionReturn(0); 378 } 379 380 #define HASH_KEY 0.6180339887 381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 387 { 388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 389 PetscBool roworiented = baij->roworiented; 390 PetscErrorCode ierr; 391 PetscInt i,j,row,col; 392 PetscInt rstart_orig=mat->rmap->rstart; 393 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 394 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 395 PetscReal tmp; 396 MatScalar **HD = baij->hd,value; 397 #if defined(PETSC_USE_DEBUG) 398 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 399 #endif 400 401 PetscFunctionBegin; 402 if (v) PetscValidScalarPointer(v,6); 403 for (i=0; i<m; i++) { 404 #if defined(PETSC_USE_DEBUG) 405 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 406 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 407 #endif 408 row = im[i]; 409 if (row >= rstart_orig && row < rend_orig) { 410 for (j=0; j<n; j++) { 411 col = in[j]; 412 if (roworiented) value = v[i*n+j]; 413 else value = v[i+j*m]; 414 /* Look up PetscInto the Hash Table */ 415 key = (row/bs)*Nbs+(col/bs)+1; 416 h1 = HASH(size,key,tmp); 417 418 419 idx = h1; 420 #if defined(PETSC_USE_DEBUG) 421 insert_ct++; 422 total_ct++; 423 if (HT[idx] != key) { 424 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 425 if (idx == size) { 426 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 427 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 428 } 429 } 430 #else 431 if (HT[idx] != key) { 432 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 433 if (idx == size) { 434 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 435 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else if (!baij->donotstash) { 444 if (roworiented) { 445 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 446 } else { 447 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 448 } 449 } 450 } 451 #if defined(PETSC_USE_DEBUG) 452 baij->ht_total_ct = total_ct; 453 baij->ht_insert_ct = insert_ct; 454 #endif 455 PetscFunctionReturn(0); 456 } 457 458 #undef __FUNCT__ 459 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 460 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 461 { 462 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 463 PetscBool roworiented = baij->roworiented; 464 PetscErrorCode ierr; 465 PetscInt i,j,ii,jj,row,col; 466 PetscInt rstart=baij->rstartbs; 467 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 468 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 469 PetscReal tmp; 470 MatScalar **HD = baij->hd,*baij_a; 471 const PetscScalar *v_t,*value; 472 #if defined(PETSC_USE_DEBUG) 473 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 474 #endif 475 476 PetscFunctionBegin; 477 if (roworiented) stepval = (n-1)*bs; 478 else stepval = (m-1)*bs; 479 480 for (i=0; i<m; i++) { 481 #if defined(PETSC_USE_DEBUG) 482 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 483 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 484 #endif 485 row = im[i]; 486 v_t = v + i*nbs2; 487 if (row >= rstart && row < rend) { 488 for (j=0; j<n; j++) { 489 col = in[j]; 490 491 /* Look up into the Hash Table */ 492 key = row*Nbs+col+1; 493 h1 = HASH(size,key,tmp); 494 495 idx = h1; 496 #if defined(PETSC_USE_DEBUG) 497 total_ct++; 498 insert_ct++; 499 if (HT[idx] != key) { 500 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 501 if (idx == size) { 502 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 503 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 504 } 505 } 506 #else 507 if (HT[idx] != key) { 508 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 509 if (idx == size) { 510 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 511 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 512 } 513 } 514 #endif 515 baij_a = HD[idx]; 516 if (roworiented) { 517 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 518 /* value = v + (i*(stepval+bs)+j)*bs; */ 519 value = v_t; 520 v_t += bs; 521 if (addv == ADD_VALUES) { 522 for (ii=0; ii<bs; ii++,value+=stepval) { 523 for (jj=ii; jj<bs2; jj+=bs) { 524 baij_a[jj] += *value++; 525 } 526 } 527 } else { 528 for (ii=0; ii<bs; ii++,value+=stepval) { 529 for (jj=ii; jj<bs2; jj+=bs) { 530 baij_a[jj] = *value++; 531 } 532 } 533 } 534 } else { 535 value = v + j*(stepval+bs)*bs + i*bs; 536 if (addv == ADD_VALUES) { 537 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 538 for (jj=0; jj<bs; jj++) { 539 baij_a[jj] += *value++; 540 } 541 } 542 } else { 543 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 544 for (jj=0; jj<bs; jj++) { 545 baij_a[jj] = *value++; 546 } 547 } 548 } 549 } 550 } 551 } else { 552 if (!baij->donotstash) { 553 if (roworiented) { 554 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 555 } else { 556 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 557 } 558 } 559 } 560 } 561 #if defined(PETSC_USE_DEBUG) 562 baij->ht_total_ct = total_ct; 563 baij->ht_insert_ct = insert_ct; 564 #endif 565 PetscFunctionReturn(0); 566 } 567 568 #undef __FUNCT__ 569 #define __FUNCT__ "MatGetValues_MPIBAIJ" 570 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 571 { 572 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 573 PetscErrorCode ierr; 574 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 575 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 576 577 PetscFunctionBegin; 578 for (i=0; i<m; i++) { 579 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 580 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 581 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 582 row = idxm[i] - bsrstart; 583 for (j=0; j<n; j++) { 584 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 585 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 586 if (idxn[j] >= bscstart && idxn[j] < bscend) { 587 col = idxn[j] - bscstart; 588 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589 } else { 590 if (!baij->colmap) { 591 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 592 } 593 #if defined(PETSC_USE_CTABLE) 594 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 595 data--; 596 #else 597 data = baij->colmap[idxn[j]/bs]-1; 598 #endif 599 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 600 else { 601 col = data + idxn[j]%bs; 602 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } 604 } 605 } 606 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 607 } 608 PetscFunctionReturn(0); 609 } 610 611 #undef __FUNCT__ 612 #define __FUNCT__ "MatNorm_MPIBAIJ" 613 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 614 { 615 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 616 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 617 PetscErrorCode ierr; 618 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 619 PetscReal sum = 0.0; 620 MatScalar *v; 621 622 PetscFunctionBegin; 623 if (baij->size == 1) { 624 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 625 } else { 626 if (type == NORM_FROBENIUS) { 627 v = amat->a; 628 nz = amat->nz*bs2; 629 for (i=0; i<nz; i++) { 630 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 631 } 632 v = bmat->a; 633 nz = bmat->nz*bs2; 634 for (i=0; i<nz; i++) { 635 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 636 } 637 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 638 *nrm = PetscSqrtReal(*nrm); 639 } else if (type == NORM_1) { /* max column sum */ 640 PetscReal *tmp,*tmp2; 641 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 642 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 643 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 644 v = amat->a; jj = amat->j; 645 for (i=0; i<amat->nz; i++) { 646 for (j=0; j<bs; j++) { 647 col = bs*(cstart + *jj) + j; /* column index */ 648 for (row=0; row<bs; row++) { 649 tmp[col] += PetscAbsScalar(*v); v++; 650 } 651 } 652 jj++; 653 } 654 v = bmat->a; jj = bmat->j; 655 for (i=0; i<bmat->nz; i++) { 656 for (j=0; j<bs; j++) { 657 col = bs*garray[*jj] + j; 658 for (row=0; row<bs; row++) { 659 tmp[col] += PetscAbsScalar(*v); v++; 660 } 661 } 662 jj++; 663 } 664 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 665 *nrm = 0.0; 666 for (j=0; j<mat->cmap->N; j++) { 667 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 668 } 669 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 670 } else if (type == NORM_INFINITY) { /* max row sum */ 671 PetscReal *sums; 672 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 673 sum = 0.0; 674 for (j=0; j<amat->mbs; j++) { 675 for (row=0; row<bs; row++) sums[row] = 0.0; 676 v = amat->a + bs2*amat->i[j]; 677 nz = amat->i[j+1]-amat->i[j]; 678 for (i=0; i<nz; i++) { 679 for (col=0; col<bs; col++) { 680 for (row=0; row<bs; row++) { 681 sums[row] += PetscAbsScalar(*v); v++; 682 } 683 } 684 } 685 v = bmat->a + bs2*bmat->i[j]; 686 nz = bmat->i[j+1]-bmat->i[j]; 687 for (i=0; i<nz; i++) { 688 for (col=0; col<bs; col++) { 689 for (row=0; row<bs; row++) { 690 sums[row] += PetscAbsScalar(*v); v++; 691 } 692 } 693 } 694 for (row=0; row<bs; row++) { 695 if (sums[row] > sum) sum = sums[row]; 696 } 697 } 698 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 699 ierr = PetscFree(sums);CHKERRQ(ierr); 700 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 701 } 702 PetscFunctionReturn(0); 703 } 704 705 /* 706 Creates the hash table, and sets the table 707 This table is created only once. 708 If new entried need to be added to the matrix 709 then the hash table has to be destroyed and 710 recreated. 711 */ 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 714 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 715 { 716 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 717 Mat A = baij->A,B=baij->B; 718 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 719 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 720 PetscErrorCode ierr; 721 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 722 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 723 PetscInt *HT,key; 724 MatScalar **HD; 725 PetscReal tmp; 726 #if defined(PETSC_USE_INFO) 727 PetscInt ct=0,max=0; 728 #endif 729 730 PetscFunctionBegin; 731 if (baij->ht) PetscFunctionReturn(0); 732 733 baij->ht_size = (PetscInt)(factor*nz); 734 ht_size = baij->ht_size; 735 736 /* Allocate Memory for Hash Table */ 737 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 738 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 739 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 740 HD = baij->hd; 741 HT = baij->ht; 742 743 /* Loop Over A */ 744 for (i=0; i<a->mbs; i++) { 745 for (j=ai[i]; j<ai[i+1]; j++) { 746 row = i+rstart; 747 col = aj[j]+cstart; 748 749 key = row*Nbs + col + 1; 750 h1 = HASH(ht_size,key,tmp); 751 for (k=0; k<ht_size; k++) { 752 if (!HT[(h1+k)%ht_size]) { 753 HT[(h1+k)%ht_size] = key; 754 HD[(h1+k)%ht_size] = a->a + j*bs2; 755 break; 756 #if defined(PETSC_USE_INFO) 757 } else { 758 ct++; 759 #endif 760 } 761 } 762 #if defined(PETSC_USE_INFO) 763 if (k> max) max = k; 764 #endif 765 } 766 } 767 /* Loop Over B */ 768 for (i=0; i<b->mbs; i++) { 769 for (j=bi[i]; j<bi[i+1]; j++) { 770 row = i+rstart; 771 col = garray[bj[j]]; 772 key = row*Nbs + col + 1; 773 h1 = HASH(ht_size,key,tmp); 774 for (k=0; k<ht_size; k++) { 775 if (!HT[(h1+k)%ht_size]) { 776 HT[(h1+k)%ht_size] = key; 777 HD[(h1+k)%ht_size] = b->a + j*bs2; 778 break; 779 #if defined(PETSC_USE_INFO) 780 } else { 781 ct++; 782 #endif 783 } 784 } 785 #if defined(PETSC_USE_INFO) 786 if (k> max) max = k; 787 #endif 788 } 789 } 790 791 /* Print Summary */ 792 #if defined(PETSC_USE_INFO) 793 for (i=0,j=0; i<ht_size; i++) { 794 if (HT[i]) j++; 795 } 796 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 797 #endif 798 PetscFunctionReturn(0); 799 } 800 801 #undef __FUNCT__ 802 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 803 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 804 { 805 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 806 PetscErrorCode ierr; 807 PetscInt nstash,reallocs; 808 InsertMode addv; 809 810 PetscFunctionBegin; 811 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 812 813 /* make sure all processors are either in INSERTMODE or ADDMODE */ 814 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 815 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 816 mat->insertmode = addv; /* in case this processor had no cache */ 817 818 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 819 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 820 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 821 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 822 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 823 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 824 PetscFunctionReturn(0); 825 } 826 827 #undef __FUNCT__ 828 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 829 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 830 { 831 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 832 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 833 PetscErrorCode ierr; 834 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 835 PetscInt *row,*col; 836 PetscBool r1,r2,r3,other_disassembled; 837 MatScalar *val; 838 InsertMode addv = mat->insertmode; 839 PetscMPIInt n; 840 841 PetscFunctionBegin; 842 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 843 if (!baij->donotstash && !mat->nooffprocentries) { 844 while (1) { 845 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 846 if (!flg) break; 847 848 for (i=0; i<n;) { 849 /* Now identify the consecutive vals belonging to the same row */ 850 for (j=i,rstart=row[j]; j<n; j++) { 851 if (row[j] != rstart) break; 852 } 853 if (j < n) ncols = j-i; 854 else ncols = n-i; 855 /* Now assemble all these values with a single function call */ 856 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 857 i = j; 858 } 859 } 860 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 861 /* Now process the block-stash. Since the values are stashed column-oriented, 862 set the roworiented flag to column oriented, and after MatSetValues() 863 restore the original flags */ 864 r1 = baij->roworiented; 865 r2 = a->roworiented; 866 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 867 868 baij->roworiented = PETSC_FALSE; 869 a->roworiented = PETSC_FALSE; 870 871 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 872 while (1) { 873 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 874 if (!flg) break; 875 876 for (i=0; i<n;) { 877 /* Now identify the consecutive vals belonging to the same row */ 878 for (j=i,rstart=row[j]; j<n; j++) { 879 if (row[j] != rstart) break; 880 } 881 if (j < n) ncols = j-i; 882 else ncols = n-i; 883 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 884 i = j; 885 } 886 } 887 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 888 889 baij->roworiented = r1; 890 a->roworiented = r2; 891 892 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 893 } 894 895 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 896 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 897 898 /* determine if any processor has disassembled, if so we must 899 also disassemble ourselfs, in order that we may reassemble. */ 900 /* 901 if nonzero structure of submatrix B cannot change then we know that 902 no processor disassembled thus we can skip this stuff 903 */ 904 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 905 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 906 if (mat->was_assembled && !other_disassembled) { 907 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 908 } 909 } 910 911 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 912 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 913 } 914 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 915 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 916 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 917 918 #if defined(PETSC_USE_INFO) 919 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 920 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 921 922 baij->ht_total_ct = 0; 923 baij->ht_insert_ct = 0; 924 } 925 #endif 926 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 927 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 928 929 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 930 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 931 } 932 933 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 934 935 baij->rowvalues = 0; 936 PetscFunctionReturn(0); 937 } 938 939 #include <petscdraw.h> 940 #undef __FUNCT__ 941 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 942 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 943 { 944 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 945 PetscErrorCode ierr; 946 PetscMPIInt size = baij->size,rank = baij->rank; 947 PetscInt bs = mat->rmap->bs; 948 PetscBool iascii,isdraw; 949 PetscViewer sviewer; 950 PetscViewerFormat format; 951 952 PetscFunctionBegin; 953 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 954 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 955 if (iascii) { 956 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 957 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 958 MatInfo info; 959 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 960 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 961 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 962 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 963 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 964 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 965 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 966 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 967 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 968 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 969 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 970 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 971 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } else if (format == PETSC_VIEWER_ASCII_INFO) { 974 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 975 PetscFunctionReturn(0); 976 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 977 PetscFunctionReturn(0); 978 } 979 } 980 981 if (isdraw) { 982 PetscDraw draw; 983 PetscBool isnull; 984 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 985 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 986 } 987 988 if (size == 1) { 989 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 990 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 991 } else { 992 /* assemble the entire matrix onto first processor. */ 993 Mat A; 994 Mat_SeqBAIJ *Aloc; 995 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 996 MatScalar *a; 997 998 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 999 /* Perhaps this should be the type of mat? */ 1000 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1001 if (!rank) { 1002 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1003 } else { 1004 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1005 } 1006 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1007 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1008 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1009 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1010 1011 /* copy over the A part */ 1012 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1013 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1014 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1015 1016 for (i=0; i<mbs; i++) { 1017 rvals[0] = bs*(baij->rstartbs + i); 1018 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1019 for (j=ai[i]; j<ai[i+1]; j++) { 1020 col = (baij->cstartbs+aj[j])*bs; 1021 for (k=0; k<bs; k++) { 1022 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1023 col++; a += bs; 1024 } 1025 } 1026 } 1027 /* copy over the B part */ 1028 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1029 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1030 for (i=0; i<mbs; i++) { 1031 rvals[0] = bs*(baij->rstartbs + i); 1032 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1033 for (j=ai[i]; j<ai[i+1]; j++) { 1034 col = baij->garray[aj[j]]*bs; 1035 for (k=0; k<bs; k++) { 1036 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1037 col++; a += bs; 1038 } 1039 } 1040 } 1041 ierr = PetscFree(rvals);CHKERRQ(ierr); 1042 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1043 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1044 /* 1045 Everyone has to call to draw the matrix since the graphics waits are 1046 synchronized across all processors that share the PetscDraw object 1047 */ 1048 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1049 if (!rank) { 1050 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1051 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1052 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1053 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1054 } 1055 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1056 ierr = MatDestroy(&A);CHKERRQ(ierr); 1057 } 1058 PetscFunctionReturn(0); 1059 } 1060 1061 #undef __FUNCT__ 1062 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1063 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1064 { 1065 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1066 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1067 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1068 PetscErrorCode ierr; 1069 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1070 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1071 int fd; 1072 PetscScalar *column_values; 1073 FILE *file; 1074 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1075 PetscInt message_count,flowcontrolcount; 1076 1077 PetscFunctionBegin; 1078 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1079 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1080 nz = bs2*(A->nz + B->nz); 1081 rlen = mat->rmap->n; 1082 if (!rank) { 1083 header[0] = MAT_FILE_CLASSID; 1084 header[1] = mat->rmap->N; 1085 header[2] = mat->cmap->N; 1086 1087 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1088 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1089 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1090 /* get largest number of rows any processor has */ 1091 range = mat->rmap->range; 1092 for (i=1; i<size; i++) { 1093 rlen = PetscMax(rlen,range[i+1] - range[i]); 1094 } 1095 } else { 1096 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1097 } 1098 1099 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1100 /* compute lengths of each row */ 1101 for (i=0; i<a->mbs; i++) { 1102 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1103 } 1104 /* store the row lengths to the file */ 1105 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1106 if (!rank) { 1107 MPI_Status status; 1108 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1109 rlen = (range[1] - range[0])/bs; 1110 for (i=0; i<rlen; i++) { 1111 for (j=0; j<bs; j++) { 1112 row_lens[i*bs+j] = bs*crow_lens[i]; 1113 } 1114 } 1115 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1116 for (i=1; i<size; i++) { 1117 rlen = (range[i+1] - range[i])/bs; 1118 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1119 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1120 for (k=0; k<rlen; k++) { 1121 for (j=0; j<bs; j++) { 1122 row_lens[k*bs+j] = bs*crow_lens[k]; 1123 } 1124 } 1125 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1126 } 1127 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1128 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1129 } else { 1130 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1131 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1132 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1133 } 1134 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1135 1136 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1137 information needed to make it for each row from a block row. This does require more communication but still not more than 1138 the communication needed for the nonzero values */ 1139 nzmax = nz; /* space a largest processor needs */ 1140 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1141 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1142 cnt = 0; 1143 for (i=0; i<a->mbs; i++) { 1144 pcnt = cnt; 1145 for (j=B->i[i]; j<B->i[i+1]; j++) { 1146 if ((col = garray[B->j[j]]) > cstart) break; 1147 for (l=0; l<bs; l++) { 1148 column_indices[cnt++] = bs*col+l; 1149 } 1150 } 1151 for (k=A->i[i]; k<A->i[i+1]; k++) { 1152 for (l=0; l<bs; l++) { 1153 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1154 } 1155 } 1156 for (; j<B->i[i+1]; j++) { 1157 for (l=0; l<bs; l++) { 1158 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1159 } 1160 } 1161 len = cnt - pcnt; 1162 for (k=1; k<bs; k++) { 1163 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1164 cnt += len; 1165 } 1166 } 1167 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1168 1169 /* store the columns to the file */ 1170 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1171 if (!rank) { 1172 MPI_Status status; 1173 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1174 for (i=1; i<size; i++) { 1175 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1176 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1177 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1178 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1179 } 1180 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1181 } else { 1182 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1183 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1184 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1185 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1186 } 1187 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1188 1189 /* load up the numerical values */ 1190 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1191 cnt = 0; 1192 for (i=0; i<a->mbs; i++) { 1193 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1194 for (j=B->i[i]; j<B->i[i+1]; j++) { 1195 if (garray[B->j[j]] > cstart) break; 1196 for (l=0; l<bs; l++) { 1197 for (ll=0; ll<bs; ll++) { 1198 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1199 } 1200 } 1201 cnt += bs; 1202 } 1203 for (k=A->i[i]; k<A->i[i+1]; k++) { 1204 for (l=0; l<bs; l++) { 1205 for (ll=0; ll<bs; ll++) { 1206 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1207 } 1208 } 1209 cnt += bs; 1210 } 1211 for (; j<B->i[i+1]; j++) { 1212 for (l=0; l<bs; l++) { 1213 for (ll=0; ll<bs; ll++) { 1214 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1215 } 1216 } 1217 cnt += bs; 1218 } 1219 cnt += (bs-1)*rlen; 1220 } 1221 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1222 1223 /* store the column values to the file */ 1224 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1225 if (!rank) { 1226 MPI_Status status; 1227 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1228 for (i=1; i<size; i++) { 1229 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1230 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1231 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1232 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1233 } 1234 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1235 } else { 1236 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1237 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1238 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1239 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1240 } 1241 ierr = PetscFree(column_values);CHKERRQ(ierr); 1242 1243 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1244 if (file) { 1245 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1246 } 1247 PetscFunctionReturn(0); 1248 } 1249 1250 #undef __FUNCT__ 1251 #define __FUNCT__ "MatView_MPIBAIJ" 1252 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1253 { 1254 PetscErrorCode ierr; 1255 PetscBool iascii,isdraw,issocket,isbinary; 1256 1257 PetscFunctionBegin; 1258 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1259 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1260 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1261 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1262 if (iascii || isdraw || issocket) { 1263 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1264 } else if (isbinary) { 1265 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1266 } 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1272 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1273 { 1274 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1275 PetscErrorCode ierr; 1276 1277 PetscFunctionBegin; 1278 #if defined(PETSC_USE_LOG) 1279 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1280 #endif 1281 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1282 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1283 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1284 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1285 #if defined(PETSC_USE_CTABLE) 1286 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1287 #else 1288 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1289 #endif 1290 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1291 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1292 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1293 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1294 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1295 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1296 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1297 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1298 1299 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",NULL);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",NULL);CHKERRQ(ierr); 1309 PetscFunctionReturn(0); 1310 } 1311 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatMult_MPIBAIJ" 1314 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1315 { 1316 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1317 PetscErrorCode ierr; 1318 PetscInt nt; 1319 1320 PetscFunctionBegin; 1321 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1322 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1323 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1324 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1325 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1326 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1327 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1328 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } 1331 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1334 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1335 { 1336 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1337 PetscErrorCode ierr; 1338 1339 PetscFunctionBegin; 1340 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1341 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1342 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1343 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1344 PetscFunctionReturn(0); 1345 } 1346 1347 #undef __FUNCT__ 1348 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1349 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1350 { 1351 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1352 PetscErrorCode ierr; 1353 PetscBool merged; 1354 1355 PetscFunctionBegin; 1356 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1357 /* do nondiagonal part */ 1358 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1359 if (!merged) { 1360 /* send it on its way */ 1361 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1362 /* do local part */ 1363 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1364 /* receive remote parts: note this assumes the values are not actually */ 1365 /* inserted in yy until the next line */ 1366 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1367 } else { 1368 /* do local part */ 1369 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1370 /* send it on its way */ 1371 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1372 /* values actually were received in the Begin() but we need to call this nop */ 1373 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1374 } 1375 PetscFunctionReturn(0); 1376 } 1377 1378 #undef __FUNCT__ 1379 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1380 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1381 { 1382 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1383 PetscErrorCode ierr; 1384 1385 PetscFunctionBegin; 1386 /* do nondiagonal part */ 1387 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1388 /* send it on its way */ 1389 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1390 /* do local part */ 1391 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1392 /* receive remote parts: note this assumes the values are not actually */ 1393 /* inserted in yy until the next line, which is true for my implementation*/ 1394 /* but is not perhaps always true. */ 1395 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1396 PetscFunctionReturn(0); 1397 } 1398 1399 /* 1400 This only works correctly for square matrices where the subblock A->A is the 1401 diagonal block 1402 */ 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1405 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1406 { 1407 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1408 PetscErrorCode ierr; 1409 1410 PetscFunctionBegin; 1411 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1412 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "MatScale_MPIBAIJ" 1418 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1419 { 1420 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1421 PetscErrorCode ierr; 1422 1423 PetscFunctionBegin; 1424 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1425 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1431 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1432 { 1433 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1434 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1435 PetscErrorCode ierr; 1436 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1437 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1438 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1439 1440 PetscFunctionBegin; 1441 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1442 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1443 mat->getrowactive = PETSC_TRUE; 1444 1445 if (!mat->rowvalues && (idx || v)) { 1446 /* 1447 allocate enough space to hold information from the longest row. 1448 */ 1449 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1450 PetscInt max = 1,mbs = mat->mbs,tmp; 1451 for (i=0; i<mbs; i++) { 1452 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1453 if (max < tmp) max = tmp; 1454 } 1455 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1456 } 1457 lrow = row - brstart; 1458 1459 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1460 if (!v) {pvA = 0; pvB = 0;} 1461 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1462 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1463 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1464 nztot = nzA + nzB; 1465 1466 cmap = mat->garray; 1467 if (v || idx) { 1468 if (nztot) { 1469 /* Sort by increasing column numbers, assuming A and B already sorted */ 1470 PetscInt imark = -1; 1471 if (v) { 1472 *v = v_p = mat->rowvalues; 1473 for (i=0; i<nzB; i++) { 1474 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1475 else break; 1476 } 1477 imark = i; 1478 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1479 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1480 } 1481 if (idx) { 1482 *idx = idx_p = mat->rowindices; 1483 if (imark > -1) { 1484 for (i=0; i<imark; i++) { 1485 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1486 } 1487 } else { 1488 for (i=0; i<nzB; i++) { 1489 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1490 else break; 1491 } 1492 imark = i; 1493 } 1494 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1495 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1496 } 1497 } else { 1498 if (idx) *idx = 0; 1499 if (v) *v = 0; 1500 } 1501 } 1502 *nz = nztot; 1503 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1504 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1505 PetscFunctionReturn(0); 1506 } 1507 1508 #undef __FUNCT__ 1509 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1510 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1511 { 1512 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1513 1514 PetscFunctionBegin; 1515 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1516 baij->getrowactive = PETSC_FALSE; 1517 PetscFunctionReturn(0); 1518 } 1519 1520 #undef __FUNCT__ 1521 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1522 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1523 { 1524 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1525 PetscErrorCode ierr; 1526 1527 PetscFunctionBegin; 1528 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1529 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1535 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1536 { 1537 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1538 Mat A = a->A,B = a->B; 1539 PetscErrorCode ierr; 1540 PetscReal isend[5],irecv[5]; 1541 1542 PetscFunctionBegin; 1543 info->block_size = (PetscReal)matin->rmap->bs; 1544 1545 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1546 1547 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1548 isend[3] = info->memory; isend[4] = info->mallocs; 1549 1550 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1551 1552 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1553 isend[3] += info->memory; isend[4] += info->mallocs; 1554 1555 if (flag == MAT_LOCAL) { 1556 info->nz_used = isend[0]; 1557 info->nz_allocated = isend[1]; 1558 info->nz_unneeded = isend[2]; 1559 info->memory = isend[3]; 1560 info->mallocs = isend[4]; 1561 } else if (flag == MAT_GLOBAL_MAX) { 1562 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1563 1564 info->nz_used = irecv[0]; 1565 info->nz_allocated = irecv[1]; 1566 info->nz_unneeded = irecv[2]; 1567 info->memory = irecv[3]; 1568 info->mallocs = irecv[4]; 1569 } else if (flag == MAT_GLOBAL_SUM) { 1570 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1571 1572 info->nz_used = irecv[0]; 1573 info->nz_allocated = irecv[1]; 1574 info->nz_unneeded = irecv[2]; 1575 info->memory = irecv[3]; 1576 info->mallocs = irecv[4]; 1577 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1578 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1579 info->fill_ratio_needed = 0; 1580 info->factor_mallocs = 0; 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1586 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1587 { 1588 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1589 PetscErrorCode ierr; 1590 1591 PetscFunctionBegin; 1592 switch (op) { 1593 case MAT_NEW_NONZERO_LOCATIONS: 1594 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1595 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1596 case MAT_KEEP_NONZERO_PATTERN: 1597 case MAT_NEW_NONZERO_LOCATION_ERR: 1598 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1599 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1600 break; 1601 case MAT_ROW_ORIENTED: 1602 a->roworiented = flg; 1603 1604 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1605 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1606 break; 1607 case MAT_NEW_DIAGONALS: 1608 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1609 break; 1610 case MAT_IGNORE_OFF_PROC_ENTRIES: 1611 a->donotstash = flg; 1612 break; 1613 case MAT_USE_HASH_TABLE: 1614 a->ht_flag = flg; 1615 break; 1616 case MAT_SYMMETRIC: 1617 case MAT_STRUCTURALLY_SYMMETRIC: 1618 case MAT_HERMITIAN: 1619 case MAT_SYMMETRY_ETERNAL: 1620 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1621 break; 1622 default: 1623 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1624 } 1625 PetscFunctionReturn(0); 1626 } 1627 1628 #undef __FUNCT__ 1629 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1630 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1631 { 1632 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1633 Mat_SeqBAIJ *Aloc; 1634 Mat B; 1635 PetscErrorCode ierr; 1636 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1637 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1638 MatScalar *a; 1639 1640 PetscFunctionBegin; 1641 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1642 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1643 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1644 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1645 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1646 /* Do not know preallocation information, but must set block size */ 1647 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1648 } else { 1649 B = *matout; 1650 } 1651 1652 /* copy over the A part */ 1653 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1654 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1655 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1656 1657 for (i=0; i<mbs; i++) { 1658 rvals[0] = bs*(baij->rstartbs + i); 1659 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1660 for (j=ai[i]; j<ai[i+1]; j++) { 1661 col = (baij->cstartbs+aj[j])*bs; 1662 for (k=0; k<bs; k++) { 1663 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1664 1665 col++; a += bs; 1666 } 1667 } 1668 } 1669 /* copy over the B part */ 1670 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1671 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1672 for (i=0; i<mbs; i++) { 1673 rvals[0] = bs*(baij->rstartbs + i); 1674 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1675 for (j=ai[i]; j<ai[i+1]; j++) { 1676 col = baij->garray[aj[j]]*bs; 1677 for (k=0; k<bs; k++) { 1678 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1679 col++; 1680 a += bs; 1681 } 1682 } 1683 } 1684 ierr = PetscFree(rvals);CHKERRQ(ierr); 1685 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1686 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1687 1688 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1689 else { 1690 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1691 } 1692 PetscFunctionReturn(0); 1693 } 1694 1695 #undef __FUNCT__ 1696 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1697 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1698 { 1699 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1700 Mat a = baij->A,b = baij->B; 1701 PetscErrorCode ierr; 1702 PetscInt s1,s2,s3; 1703 1704 PetscFunctionBegin; 1705 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1706 if (rr) { 1707 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1708 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1709 /* Overlap communication with computation. */ 1710 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1711 } 1712 if (ll) { 1713 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1714 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1715 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1716 } 1717 /* scale the diagonal block */ 1718 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1719 1720 if (rr) { 1721 /* Do a scatter end and then right scale the off-diagonal block */ 1722 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1723 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1724 } 1725 PetscFunctionReturn(0); 1726 } 1727 1728 #undef __FUNCT__ 1729 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1730 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1731 { 1732 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1733 PetscErrorCode ierr; 1734 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1735 PetscInt i,*owners = A->rmap->range; 1736 PetscInt *nprocs,j,idx,nsends,row; 1737 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1738 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1739 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1740 MPI_Comm comm; 1741 MPI_Request *send_waits,*recv_waits; 1742 MPI_Status recv_status,*send_status; 1743 const PetscScalar *xx; 1744 PetscScalar *bb; 1745 #if defined(PETSC_DEBUG) 1746 PetscBool found = PETSC_FALSE; 1747 #endif 1748 1749 PetscFunctionBegin; 1750 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1751 /* first count number of contributors to each processor */ 1752 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1753 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1755 j = 0; 1756 for (i=0; i<N; i++) { 1757 if (lastidx > (idx = rows[i])) j = 0; 1758 lastidx = idx; 1759 for (; j<size; j++) { 1760 if (idx >= owners[j] && idx < owners[j+1]) { 1761 nprocs[2*j]++; 1762 nprocs[2*j+1] = 1; 1763 owner[i] = j; 1764 #if defined(PETSC_DEBUG) 1765 found = PETSC_TRUE; 1766 #endif 1767 break; 1768 } 1769 } 1770 #if defined(PETSC_DEBUG) 1771 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1772 found = PETSC_FALSE; 1773 #endif 1774 } 1775 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 1776 1777 if (A->nooffproczerorows) { 1778 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1779 nrecvs = nsends; 1780 nmax = N; 1781 } else { 1782 /* inform other processors of number of messages and max length*/ 1783 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1784 } 1785 1786 /* post receives: */ 1787 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1788 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1789 for (i=0; i<nrecvs; i++) { 1790 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1791 } 1792 1793 /* do sends: 1794 1) starts[i] gives the starting index in svalues for stuff going to 1795 the ith processor 1796 */ 1797 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1798 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1799 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1800 starts[0] = 0; 1801 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 1802 for (i=0; i<N; i++) { 1803 svalues[starts[owner[i]]++] = rows[i]; 1804 } 1805 1806 starts[0] = 0; 1807 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 1808 count = 0; 1809 for (i=0; i<size; i++) { 1810 if (nprocs[2*i+1]) { 1811 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1812 } 1813 } 1814 ierr = PetscFree(starts);CHKERRQ(ierr); 1815 1816 base = owners[rank]; 1817 1818 /* wait on receives */ 1819 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1820 count = nrecvs; 1821 slen = 0; 1822 while (count) { 1823 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1824 /* unpack receives into our local space */ 1825 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1826 1827 source[imdex] = recv_status.MPI_SOURCE; 1828 lens[imdex] = n; 1829 slen += n; 1830 count--; 1831 } 1832 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1833 1834 /* move the data into the send scatter */ 1835 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1836 count = 0; 1837 for (i=0; i<nrecvs; i++) { 1838 values = rvalues + i*nmax; 1839 for (j=0; j<lens[i]; j++) { 1840 lrows[count++] = values[j] - base; 1841 } 1842 } 1843 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1844 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1845 ierr = PetscFree(owner);CHKERRQ(ierr); 1846 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1847 1848 /* fix right hand side if needed */ 1849 if (x && b) { 1850 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1851 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1852 for (i=0; i<slen; i++) { 1853 bb[lrows[i]] = diag*xx[lrows[i]]; 1854 } 1855 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1856 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1857 } 1858 1859 /* actually zap the local rows */ 1860 /* 1861 Zero the required rows. If the "diagonal block" of the matrix 1862 is square and the user wishes to set the diagonal we use separate 1863 code so that MatSetValues() is not called for each diagonal allocating 1864 new memory, thus calling lots of mallocs and slowing things down. 1865 1866 */ 1867 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1868 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1869 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1870 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1871 } else if (diag != 0.0) { 1872 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1873 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1874 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1875 for (i=0; i<slen; i++) { 1876 row = lrows[i] + rstart_bs; 1877 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1878 } 1879 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1880 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1881 } else { 1882 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1883 } 1884 1885 ierr = PetscFree(lrows);CHKERRQ(ierr); 1886 1887 /* wait on sends */ 1888 if (nsends) { 1889 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1890 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1891 ierr = PetscFree(send_status);CHKERRQ(ierr); 1892 } 1893 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1894 ierr = PetscFree(svalues);CHKERRQ(ierr); 1895 PetscFunctionReturn(0); 1896 } 1897 1898 #undef __FUNCT__ 1899 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1900 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1901 { 1902 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1903 PetscErrorCode ierr; 1904 1905 PetscFunctionBegin; 1906 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1911 1912 #undef __FUNCT__ 1913 #define __FUNCT__ "MatEqual_MPIBAIJ" 1914 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1915 { 1916 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1917 Mat a,b,c,d; 1918 PetscBool flg; 1919 PetscErrorCode ierr; 1920 1921 PetscFunctionBegin; 1922 a = matA->A; b = matA->B; 1923 c = matB->A; d = matB->B; 1924 1925 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1926 if (flg) { 1927 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1928 } 1929 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1930 PetscFunctionReturn(0); 1931 } 1932 1933 #undef __FUNCT__ 1934 #define __FUNCT__ "MatCopy_MPIBAIJ" 1935 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1936 { 1937 PetscErrorCode ierr; 1938 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1939 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1940 1941 PetscFunctionBegin; 1942 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1943 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1944 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1945 } else { 1946 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1947 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1948 } 1949 PetscFunctionReturn(0); 1950 } 1951 1952 #undef __FUNCT__ 1953 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1954 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1955 { 1956 PetscErrorCode ierr; 1957 1958 PetscFunctionBegin; 1959 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1960 PetscFunctionReturn(0); 1961 } 1962 1963 #undef __FUNCT__ 1964 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1965 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1966 { 1967 PetscErrorCode ierr; 1968 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1969 PetscBLASInt bnz,one=1; 1970 Mat_SeqBAIJ *x,*y; 1971 1972 PetscFunctionBegin; 1973 if (str == SAME_NONZERO_PATTERN) { 1974 PetscScalar alpha = a; 1975 x = (Mat_SeqBAIJ*)xx->A->data; 1976 y = (Mat_SeqBAIJ*)yy->A->data; 1977 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1978 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1979 x = (Mat_SeqBAIJ*)xx->B->data; 1980 y = (Mat_SeqBAIJ*)yy->B->data; 1981 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1982 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1983 } else { 1984 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1985 } 1986 PetscFunctionReturn(0); 1987 } 1988 1989 #undef __FUNCT__ 1990 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1991 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1992 { 1993 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1994 PetscErrorCode ierr; 1995 1996 PetscFunctionBegin; 1997 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1998 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1999 PetscFunctionReturn(0); 2000 } 2001 2002 #undef __FUNCT__ 2003 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2004 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2005 { 2006 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2007 PetscErrorCode ierr; 2008 2009 PetscFunctionBegin; 2010 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2011 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2012 PetscFunctionReturn(0); 2013 } 2014 2015 #undef __FUNCT__ 2016 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2017 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2018 { 2019 PetscErrorCode ierr; 2020 IS iscol_local; 2021 PetscInt csize; 2022 2023 PetscFunctionBegin; 2024 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2025 if (call == MAT_REUSE_MATRIX) { 2026 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2027 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2028 } else { 2029 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2030 } 2031 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2032 if (call == MAT_INITIAL_MATRIX) { 2033 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2034 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2035 } 2036 PetscFunctionReturn(0); 2037 } 2038 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2039 #undef __FUNCT__ 2040 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2041 /* 2042 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2043 in local and then by concatenating the local matrices the end result. 2044 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2045 */ 2046 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2047 { 2048 PetscErrorCode ierr; 2049 PetscMPIInt rank,size; 2050 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2051 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2052 Mat M,Mreuse; 2053 MatScalar *vwork,*aa; 2054 MPI_Comm comm; 2055 IS isrow_new, iscol_new; 2056 PetscBool idflag,allrows, allcols; 2057 Mat_SeqBAIJ *aij; 2058 2059 PetscFunctionBegin; 2060 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2061 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2062 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2063 /* The compression and expansion should be avoided. Doesn't point 2064 out errors, might change the indices, hence buggey */ 2065 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2066 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2067 2068 /* Check for special case: each processor gets entire matrix columns */ 2069 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2070 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2071 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 2072 else allcols = PETSC_FALSE; 2073 2074 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2075 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2076 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 2077 else allrows = PETSC_FALSE; 2078 2079 if (call == MAT_REUSE_MATRIX) { 2080 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2081 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2082 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2083 } else { 2084 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2085 } 2086 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2087 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2088 /* 2089 m - number of local rows 2090 n - number of columns (same on all processors) 2091 rstart - first row in new global matrix generated 2092 */ 2093 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2094 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2095 m = m/bs; 2096 n = n/bs; 2097 2098 if (call == MAT_INITIAL_MATRIX) { 2099 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2100 ii = aij->i; 2101 jj = aij->j; 2102 2103 /* 2104 Determine the number of non-zeros in the diagonal and off-diagonal 2105 portions of the matrix in order to do correct preallocation 2106 */ 2107 2108 /* first get start and end of "diagonal" columns */ 2109 if (csize == PETSC_DECIDE) { 2110 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2111 if (mglobal == n*bs) { /* square matrix */ 2112 nlocal = m; 2113 } else { 2114 nlocal = n/size + ((n % size) > rank); 2115 } 2116 } else { 2117 nlocal = csize/bs; 2118 } 2119 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2120 rstart = rend - nlocal; 2121 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2122 2123 /* next, compute all the lengths */ 2124 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2125 olens = dlens + m; 2126 for (i=0; i<m; i++) { 2127 jend = ii[i+1] - ii[i]; 2128 olen = 0; 2129 dlen = 0; 2130 for (j=0; j<jend; j++) { 2131 if (*jj < rstart || *jj >= rend) olen++; 2132 else dlen++; 2133 jj++; 2134 } 2135 olens[i] = olen; 2136 dlens[i] = dlen; 2137 } 2138 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2139 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2140 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2141 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2142 ierr = PetscFree(dlens);CHKERRQ(ierr); 2143 } else { 2144 PetscInt ml,nl; 2145 2146 M = *newmat; 2147 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2148 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2149 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2150 /* 2151 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2152 rather than the slower MatSetValues(). 2153 */ 2154 M->was_assembled = PETSC_TRUE; 2155 M->assembled = PETSC_FALSE; 2156 } 2157 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2158 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2159 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2160 ii = aij->i; 2161 jj = aij->j; 2162 aa = aij->a; 2163 for (i=0; i<m; i++) { 2164 row = rstart/bs + i; 2165 nz = ii[i+1] - ii[i]; 2166 cwork = jj; jj += nz; 2167 vwork = aa; aa += nz*bs*bs; 2168 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2169 } 2170 2171 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2172 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2173 *newmat = M; 2174 2175 /* save submatrix used in processor for next request */ 2176 if (call == MAT_INITIAL_MATRIX) { 2177 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2178 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2179 } 2180 PetscFunctionReturn(0); 2181 } 2182 2183 #undef __FUNCT__ 2184 #define __FUNCT__ "MatPermute_MPIBAIJ" 2185 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2186 { 2187 MPI_Comm comm,pcomm; 2188 PetscInt first,rlocal_size,clocal_size,nrows; 2189 const PetscInt *rows; 2190 PetscMPIInt size; 2191 IS crowp,growp,irowp,lrowp,lcolp; 2192 PetscErrorCode ierr; 2193 2194 PetscFunctionBegin; 2195 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2196 /* make a collective version of 'rowp' */ 2197 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2198 if (pcomm==comm) { 2199 crowp = rowp; 2200 } else { 2201 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2202 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2203 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2204 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2205 } 2206 /* collect the global row permutation and invert it */ 2207 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2208 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2209 if (pcomm!=comm) { 2210 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2211 } 2212 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2213 ierr = ISDestroy(&growp);CHKERRQ(ierr); 2214 /* get the local target indices */ 2215 ierr = MatGetOwnershipRange(A,&first,NULL);CHKERRQ(ierr); 2216 ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr); 2217 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2218 ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2219 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2220 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2221 /* the column permutation is so much easier; 2222 make a local version of 'colp' and invert it */ 2223 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2224 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2225 if (size==1) { 2226 lcolp = colp; 2227 } else { 2228 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2229 } 2230 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2231 /* now we just get the submatrix */ 2232 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2233 if (size>1) { 2234 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2235 } 2236 /* clean up */ 2237 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2238 PetscFunctionReturn(0); 2239 } 2240 2241 #undef __FUNCT__ 2242 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2243 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2244 { 2245 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2246 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2247 2248 PetscFunctionBegin; 2249 if (nghosts) *nghosts = B->nbs; 2250 if (ghosts) *ghosts = baij->garray; 2251 PetscFunctionReturn(0); 2252 } 2253 2254 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat); 2255 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2258 /* 2259 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2260 */ 2261 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2262 { 2263 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2264 PetscErrorCode ierr; 2265 PetscMPIInt size,*ncolsonproc,*disp,nn; 2266 PetscInt bs,i,n,nrows,j,k,m,ncols,col; 2267 const PetscInt *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj,*ltog; 2268 PetscInt nis = iscoloring->n,nctot,*cols; 2269 PetscInt *rowhit,M,cstart,cend,colb; 2270 PetscInt *columnsforrow,l; 2271 IS *isa; 2272 PetscBool done,flg; 2273 ISLocalToGlobalMapping map = mat->cmap->bmapping; 2274 PetscInt ctype=c->ctype; 2275 2276 PetscFunctionBegin; 2277 if (!mat->assembled) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2278 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2279 2280 if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,<og);CHKERRQ(ierr);} 2281 else ltog = NULL; 2282 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2283 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2284 2285 M = mat->rmap->n/bs; 2286 cstart = mat->cmap->rstart/bs; 2287 cend = mat->cmap->rend/bs; 2288 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2289 c->N = mat->cmap->N/bs; 2290 c->m = mat->rmap->n/bs; 2291 c->rstart = mat->rmap->rstart/bs; 2292 2293 c->ncolors = nis; 2294 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2295 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2296 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2297 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2298 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2299 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2300 2301 /* Allow access to data structures of local part of matrix */ 2302 if (!baij->colmap) { 2303 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2304 } 2305 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2306 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2307 2308 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2309 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2310 2311 for (i=0; i<nis; i++) { 2312 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2313 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2314 2315 c->ncolumns[i] = n; 2316 if (n) { 2317 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2318 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2319 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2320 } else { 2321 c->columns[i] = 0; 2322 } 2323 2324 if (ctype == IS_COLORING_GLOBAL) { 2325 /* Determine the total (parallel) number of columns of this color */ 2326 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 2327 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2328 2329 ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr); 2330 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2331 nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j]; 2332 if (!nctot) { 2333 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2334 } 2335 2336 disp[0] = 0; 2337 for (j=1; j<size; j++) { 2338 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2339 } 2340 2341 /* Get complete list of columns for color on each processor */ 2342 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2343 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2344 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2345 } else if (ctype == IS_COLORING_GHOSTED) { 2346 /* Determine local number of columns of this color on this process, including ghost points */ 2347 nctot = n; 2348 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2349 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2350 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2351 2352 /* 2353 Mark all rows affect by these columns 2354 */ 2355 /* Temporary option to allow for debugging/testing */ 2356 flg = PETSC_FALSE; 2357 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 2358 if (!flg) { /*-----------------------------------------------------------------------------*/ 2359 /* crude, fast version */ 2360 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2361 /* loop over columns*/ 2362 for (j=0; j<nctot; j++) { 2363 if (ctype == IS_COLORING_GHOSTED) { 2364 col = ltog[cols[j]]; 2365 } else { 2366 col = cols[j]; 2367 } 2368 if (col >= cstart && col < cend) { 2369 /* column is in diagonal block of matrix */ 2370 rows = A_cj + A_ci[col-cstart]; 2371 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2372 } else { 2373 #if defined(PETSC_USE_CTABLE) 2374 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2375 colb--; 2376 #else 2377 colb = baij->colmap[col] - 1; 2378 #endif 2379 if (colb == -1) { 2380 m = 0; 2381 } else { 2382 colb = colb/bs; 2383 rows = B_cj + B_ci[colb]; 2384 m = B_ci[colb+1] - B_ci[colb]; 2385 } 2386 } 2387 /* loop over columns marking them in rowhit */ 2388 for (k=0; k<m; k++) { 2389 rowhit[*rows++] = col + 1; 2390 } 2391 } 2392 2393 /* count the number of hits */ 2394 nrows = 0; 2395 for (j=0; j<M; j++) { 2396 if (rowhit[j]) nrows++; 2397 } 2398 c->nrows[i] = nrows; 2399 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2400 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2401 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2402 nrows = 0; 2403 for (j=0; j<M; j++) { 2404 if (rowhit[j]) { 2405 c->rows[i][nrows] = j; 2406 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2407 nrows++; 2408 } 2409 } 2410 } else { /*-------------------------------------------------------------------------------*/ 2411 /* slow version, using rowhit as a linked list */ 2412 PetscInt currentcol,fm,mfm; 2413 rowhit[M] = M; 2414 nrows = 0; 2415 /* loop over columns*/ 2416 for (j=0; j<nctot; j++) { 2417 if (ctype == IS_COLORING_GHOSTED) { 2418 col = ltog[cols[j]]; 2419 } else { 2420 col = cols[j]; 2421 } 2422 if (col >= cstart && col < cend) { 2423 /* column is in diagonal block of matrix */ 2424 rows = A_cj + A_ci[col-cstart]; 2425 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2426 } else { 2427 #if defined(PETSC_USE_CTABLE) 2428 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2429 colb--; 2430 #else 2431 colb = baij->colmap[col] - 1; 2432 #endif 2433 if (colb == -1) { 2434 m = 0; 2435 } else { 2436 colb = colb/bs; 2437 rows = B_cj + B_ci[colb]; 2438 m = B_ci[colb+1] - B_ci[colb]; 2439 } 2440 } 2441 2442 /* loop over columns marking them in rowhit */ 2443 fm = M; /* fm points to first entry in linked list */ 2444 for (k=0; k<m; k++) { 2445 currentcol = *rows++; 2446 /* is it already in the list? */ 2447 do { 2448 mfm = fm; 2449 fm = rowhit[fm]; 2450 } while (fm < currentcol); 2451 /* not in list so add it */ 2452 if (fm != currentcol) { 2453 nrows++; 2454 columnsforrow[currentcol] = col; 2455 /* next three lines insert new entry into linked list */ 2456 rowhit[mfm] = currentcol; 2457 rowhit[currentcol] = fm; 2458 fm = currentcol; 2459 /* fm points to present position in list since we know the columns are sorted */ 2460 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2461 } 2462 } 2463 c->nrows[i] = nrows; 2464 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2465 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2466 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2467 /* now store the linked list of rows into c->rows[i] */ 2468 nrows = 0; 2469 fm = rowhit[M]; 2470 do { 2471 c->rows[i][nrows] = fm; 2472 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2473 fm = rowhit[fm]; 2474 } while (fm < M); 2475 } /* ---------------------------------------------------------------------------------------*/ 2476 ierr = PetscFree(cols);CHKERRQ(ierr); 2477 } 2478 2479 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2480 /* 2481 vscale will contain the "diagonal" on processor scalings followed by the off processor 2482 */ 2483 if (ctype == IS_COLORING_GLOBAL) { 2484 PetscInt *garray; 2485 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2486 for (i=0; i<baij->B->cmap->n/bs; i++) { 2487 for (j=0; j<bs; j++) { 2488 garray[i*bs+j] = bs*baij->garray[i]+j; 2489 } 2490 } 2491 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2492 ierr = PetscFree(garray);CHKERRQ(ierr); 2493 CHKMEMQ; 2494 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2495 for (k=0; k<c->ncolors; k++) { 2496 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2497 for (l=0; l<c->nrows[k]; l++) { 2498 col = c->columnsforrow[k][l]; 2499 if (col >= cstart && col < cend) { 2500 /* column is in diagonal block of matrix */ 2501 colb = col - cstart; 2502 } else { 2503 /* column is in "off-processor" part */ 2504 #if defined(PETSC_USE_CTABLE) 2505 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2506 colb--; 2507 #else 2508 colb = baij->colmap[col] - 1; 2509 #endif 2510 colb = colb/bs; 2511 colb += cend - cstart; 2512 } 2513 c->vscaleforrow[k][l] = colb; 2514 } 2515 } 2516 } else if (ctype == IS_COLORING_GHOSTED) { 2517 /* Get gtol mapping */ 2518 PetscInt N = mat->cmap->N,nlocal,*gtol; 2519 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2520 for (i=0; i<N; i++) gtol[i] = -1; 2521 ierr = ISLocalToGlobalMappingGetSize(map,&nlocal);CHKERRQ(ierr); 2522 for (i=0; i<nlocal; i++) gtol[ltog[i]] = i; 2523 2524 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2525 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2526 for (k=0; k<c->ncolors; k++) { 2527 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2528 for (l=0; l<c->nrows[k]; l++) { 2529 col = c->columnsforrow[k][l]; /* global column index */ 2530 2531 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2532 } 2533 } 2534 ierr = PetscFree(gtol);CHKERRQ(ierr); 2535 } 2536 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2537 2538 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2539 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2540 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2541 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2542 if (map) {ierr = ISLocalToGlobalMappingRestoreIndices(map,<og);CHKERRQ(ierr);} 2543 CHKMEMQ; 2544 PetscFunctionReturn(0); 2545 } 2546 2547 #undef __FUNCT__ 2548 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2549 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2550 { 2551 Mat B; 2552 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2553 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2554 Mat_SeqAIJ *b; 2555 PetscErrorCode ierr; 2556 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2557 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2558 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2559 2560 PetscFunctionBegin; 2561 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2562 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2563 2564 /* ---------------------------------------------------------------- 2565 Tell every processor the number of nonzeros per row 2566 */ 2567 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2568 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2569 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2570 } 2571 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2572 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2573 displs = recvcounts + size; 2574 for (i=0; i<size; i++) { 2575 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2576 displs[i] = A->rmap->range[i]/bs; 2577 } 2578 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2579 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2580 #else 2581 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2582 #endif 2583 /* --------------------------------------------------------------- 2584 Create the sequential matrix of the same type as the local block diagonal 2585 */ 2586 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2587 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2588 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2589 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2590 b = (Mat_SeqAIJ*)B->data; 2591 2592 /*-------------------------------------------------------------------- 2593 Copy my part of matrix column indices over 2594 */ 2595 sendcount = ad->nz + bd->nz; 2596 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2597 a_jsendbuf = ad->j; 2598 b_jsendbuf = bd->j; 2599 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2600 cnt = 0; 2601 for (i=0; i<n; i++) { 2602 2603 /* put in lower diagonal portion */ 2604 m = bd->i[i+1] - bd->i[i]; 2605 while (m > 0) { 2606 /* is it above diagonal (in bd (compressed) numbering) */ 2607 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2608 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2609 m--; 2610 } 2611 2612 /* put in diagonal portion */ 2613 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2614 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2615 } 2616 2617 /* put in upper diagonal portion */ 2618 while (m-- > 0) { 2619 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2620 } 2621 } 2622 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2623 2624 /*-------------------------------------------------------------------- 2625 Gather all column indices to all processors 2626 */ 2627 for (i=0; i<size; i++) { 2628 recvcounts[i] = 0; 2629 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2630 recvcounts[i] += lens[j]; 2631 } 2632 } 2633 displs[0] = 0; 2634 for (i=1; i<size; i++) { 2635 displs[i] = displs[i-1] + recvcounts[i-1]; 2636 } 2637 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2638 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2639 #else 2640 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2641 #endif 2642 /*-------------------------------------------------------------------- 2643 Assemble the matrix into useable form (note numerical values not yet set) 2644 */ 2645 /* set the b->ilen (length of each row) values */ 2646 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2647 /* set the b->i indices */ 2648 b->i[0] = 0; 2649 for (i=1; i<=A->rmap->N/bs; i++) { 2650 b->i[i] = b->i[i-1] + lens[i-1]; 2651 } 2652 ierr = PetscFree(lens);CHKERRQ(ierr); 2653 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2654 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2655 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2656 2657 if (A->symmetric) { 2658 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2659 } else if (A->hermitian) { 2660 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2661 } else if (A->structurally_symmetric) { 2662 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2663 } 2664 *newmat = B; 2665 PetscFunctionReturn(0); 2666 } 2667 2668 #undef __FUNCT__ 2669 #define __FUNCT__ "MatSOR_MPIBAIJ" 2670 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2671 { 2672 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2673 PetscErrorCode ierr; 2674 Vec bb1 = 0; 2675 2676 PetscFunctionBegin; 2677 if (flag == SOR_APPLY_UPPER) { 2678 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2679 PetscFunctionReturn(0); 2680 } 2681 2682 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2683 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2684 } 2685 2686 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2687 if (flag & SOR_ZERO_INITIAL_GUESS) { 2688 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2689 its--; 2690 } 2691 2692 while (its--) { 2693 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2694 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2695 2696 /* update rhs: bb1 = bb - B*x */ 2697 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2698 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2699 2700 /* local sweep */ 2701 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2702 } 2703 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2704 if (flag & SOR_ZERO_INITIAL_GUESS) { 2705 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2706 its--; 2707 } 2708 while (its--) { 2709 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2710 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2711 2712 /* update rhs: bb1 = bb - B*x */ 2713 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2714 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2715 2716 /* local sweep */ 2717 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2718 } 2719 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2720 if (flag & SOR_ZERO_INITIAL_GUESS) { 2721 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2722 its--; 2723 } 2724 while (its--) { 2725 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2726 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2727 2728 /* update rhs: bb1 = bb - B*x */ 2729 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2730 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2731 2732 /* local sweep */ 2733 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2734 } 2735 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2736 2737 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2738 PetscFunctionReturn(0); 2739 } 2740 2741 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2742 2743 #undef __FUNCT__ 2744 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2745 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2746 { 2747 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2748 PetscErrorCode ierr; 2749 2750 PetscFunctionBegin; 2751 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2752 PetscFunctionReturn(0); 2753 } 2754 2755 2756 /* -------------------------------------------------------------------*/ 2757 static struct _MatOps MatOps_Values = { 2758 MatSetValues_MPIBAIJ, 2759 MatGetRow_MPIBAIJ, 2760 MatRestoreRow_MPIBAIJ, 2761 MatMult_MPIBAIJ, 2762 /* 4*/ MatMultAdd_MPIBAIJ, 2763 MatMultTranspose_MPIBAIJ, 2764 MatMultTransposeAdd_MPIBAIJ, 2765 0, 2766 0, 2767 0, 2768 /*10*/ 0, 2769 0, 2770 0, 2771 MatSOR_MPIBAIJ, 2772 MatTranspose_MPIBAIJ, 2773 /*15*/ MatGetInfo_MPIBAIJ, 2774 MatEqual_MPIBAIJ, 2775 MatGetDiagonal_MPIBAIJ, 2776 MatDiagonalScale_MPIBAIJ, 2777 MatNorm_MPIBAIJ, 2778 /*20*/ MatAssemblyBegin_MPIBAIJ, 2779 MatAssemblyEnd_MPIBAIJ, 2780 MatSetOption_MPIBAIJ, 2781 MatZeroEntries_MPIBAIJ, 2782 /*24*/ MatZeroRows_MPIBAIJ, 2783 0, 2784 0, 2785 0, 2786 0, 2787 /*29*/ MatSetUp_MPIBAIJ, 2788 0, 2789 0, 2790 0, 2791 0, 2792 /*34*/ MatDuplicate_MPIBAIJ, 2793 0, 2794 0, 2795 0, 2796 0, 2797 /*39*/ MatAXPY_MPIBAIJ, 2798 MatGetSubMatrices_MPIBAIJ, 2799 MatIncreaseOverlap_MPIBAIJ, 2800 MatGetValues_MPIBAIJ, 2801 MatCopy_MPIBAIJ, 2802 /*44*/ 0, 2803 MatScale_MPIBAIJ, 2804 0, 2805 0, 2806 0, 2807 /*49*/ 0, 2808 0, 2809 0, 2810 0, 2811 0, 2812 /*54*/ MatFDColoringCreate_MPIBAIJ, 2813 0, 2814 MatSetUnfactored_MPIBAIJ, 2815 MatPermute_MPIBAIJ, 2816 MatSetValuesBlocked_MPIBAIJ, 2817 /*59*/ MatGetSubMatrix_MPIBAIJ, 2818 MatDestroy_MPIBAIJ, 2819 MatView_MPIBAIJ, 2820 0, 2821 0, 2822 /*64*/ 0, 2823 0, 2824 0, 2825 0, 2826 0, 2827 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2828 0, 2829 0, 2830 0, 2831 0, 2832 /*74*/ 0, 2833 MatFDColoringApply_BAIJ, 2834 0, 2835 0, 2836 0, 2837 /*79*/ 0, 2838 0, 2839 0, 2840 0, 2841 MatLoad_MPIBAIJ, 2842 /*84*/ 0, 2843 0, 2844 0, 2845 0, 2846 0, 2847 /*89*/ 0, 2848 0, 2849 0, 2850 0, 2851 0, 2852 /*94*/ 0, 2853 0, 2854 0, 2855 0, 2856 0, 2857 /*99*/ 0, 2858 0, 2859 0, 2860 0, 2861 0, 2862 /*104*/0, 2863 MatRealPart_MPIBAIJ, 2864 MatImaginaryPart_MPIBAIJ, 2865 0, 2866 0, 2867 /*109*/0, 2868 0, 2869 0, 2870 0, 2871 0, 2872 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2873 0, 2874 MatGetGhosts_MPIBAIJ, 2875 0, 2876 0, 2877 /*119*/0, 2878 0, 2879 0, 2880 0, 2881 0, 2882 /*124*/0, 2883 0, 2884 MatInvertBlockDiagonal_MPIBAIJ 2885 }; 2886 2887 EXTERN_C_BEGIN 2888 #undef __FUNCT__ 2889 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2890 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2891 { 2892 PetscFunctionBegin; 2893 *a = ((Mat_MPIBAIJ*)A->data)->A; 2894 PetscFunctionReturn(0); 2895 } 2896 EXTERN_C_END 2897 2898 EXTERN_C_BEGIN 2899 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2900 EXTERN_C_END 2901 2902 EXTERN_C_BEGIN 2903 #undef __FUNCT__ 2904 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2905 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2906 { 2907 PetscInt m,rstart,cstart,cend; 2908 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2909 const PetscInt *JJ =0; 2910 PetscScalar *values=0; 2911 PetscErrorCode ierr; 2912 2913 PetscFunctionBegin; 2914 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2915 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2916 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2917 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2918 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2919 m = B->rmap->n/bs; 2920 rstart = B->rmap->rstart/bs; 2921 cstart = B->cmap->rstart/bs; 2922 cend = B->cmap->rend/bs; 2923 2924 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2925 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2926 for (i=0; i<m; i++) { 2927 nz = ii[i+1] - ii[i]; 2928 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2929 nz_max = PetscMax(nz_max,nz); 2930 JJ = jj + ii[i]; 2931 for (j=0; j<nz; j++) { 2932 if (*JJ >= cstart) break; 2933 JJ++; 2934 } 2935 d = 0; 2936 for (; j<nz; j++) { 2937 if (*JJ++ >= cend) break; 2938 d++; 2939 } 2940 d_nnz[i] = d; 2941 o_nnz[i] = nz - d; 2942 } 2943 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2944 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2945 2946 values = (PetscScalar*)V; 2947 if (!values) { 2948 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2949 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2950 } 2951 for (i=0; i<m; i++) { 2952 PetscInt row = i + rstart; 2953 PetscInt ncols = ii[i+1] - ii[i]; 2954 const PetscInt *icols = jj + ii[i]; 2955 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2956 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2957 } 2958 2959 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2960 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2961 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2962 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2963 PetscFunctionReturn(0); 2964 } 2965 EXTERN_C_END 2966 2967 #undef __FUNCT__ 2968 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2969 /*@C 2970 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2971 (the default parallel PETSc format). 2972 2973 Collective on MPI_Comm 2974 2975 Input Parameters: 2976 + A - the matrix 2977 . bs - the block size 2978 . i - the indices into j for the start of each local row (starts with zero) 2979 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2980 - v - optional values in the matrix 2981 2982 Level: developer 2983 2984 .keywords: matrix, aij, compressed row, sparse, parallel 2985 2986 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2987 @*/ 2988 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2989 { 2990 PetscErrorCode ierr; 2991 2992 PetscFunctionBegin; 2993 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2994 PetscValidType(B,1); 2995 PetscValidLogicalCollectiveInt(B,bs,2); 2996 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2997 PetscFunctionReturn(0); 2998 } 2999 3000 EXTERN_C_BEGIN 3001 #undef __FUNCT__ 3002 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 3003 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 3004 { 3005 Mat_MPIBAIJ *b; 3006 PetscErrorCode ierr; 3007 PetscInt i; 3008 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3009 3010 PetscFunctionBegin; 3011 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3012 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3013 3014 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3015 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3016 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3017 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3018 3019 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3020 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3021 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3022 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3023 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3024 3025 if (d_nnz) { 3026 for (i=0; i<B->rmap->n/bs; i++) { 3027 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3028 } 3029 } 3030 if (o_nnz) { 3031 for (i=0; i<B->rmap->n/bs; i++) { 3032 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3033 } 3034 } 3035 3036 b = (Mat_MPIBAIJ*)B->data; 3037 b->bs2 = bs*bs; 3038 b->mbs = B->rmap->n/bs; 3039 b->nbs = B->cmap->n/bs; 3040 b->Mbs = B->rmap->N/bs; 3041 b->Nbs = B->cmap->N/bs; 3042 3043 for (i=0; i<=b->size; i++) { 3044 b->rangebs[i] = B->rmap->range[i]/bs; 3045 } 3046 b->rstartbs = B->rmap->rstart/bs; 3047 b->rendbs = B->rmap->rend/bs; 3048 b->cstartbs = B->cmap->rstart/bs; 3049 b->cendbs = B->cmap->rend/bs; 3050 3051 if (!B->preallocated) { 3052 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3053 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3054 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3055 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3056 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3057 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3058 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3059 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3060 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 3061 } 3062 3063 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3064 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3065 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3066 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3067 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3068 B->preallocated = PETSC_TRUE; 3069 PetscFunctionReturn(0); 3070 } 3071 EXTERN_C_END 3072 3073 EXTERN_C_BEGIN 3074 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3075 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3076 EXTERN_C_END 3077 3078 3079 EXTERN_C_BEGIN 3080 #undef __FUNCT__ 3081 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3082 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 3083 { 3084 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3085 PetscErrorCode ierr; 3086 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3087 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3088 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3089 3090 PetscFunctionBegin; 3091 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3092 ii[0] = 0; 3093 CHKMEMQ; 3094 for (i=0; i<M; i++) { 3095 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3096 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3097 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3098 /* remove one from count of matrix has diagonal */ 3099 for (j=id[i]; j<id[i+1]; j++) { 3100 if (jd[j] == i) {ii[i+1]--;break;} 3101 } 3102 CHKMEMQ; 3103 } 3104 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3105 cnt = 0; 3106 for (i=0; i<M; i++) { 3107 for (j=io[i]; j<io[i+1]; j++) { 3108 if (garray[jo[j]] > rstart) break; 3109 jj[cnt++] = garray[jo[j]]; 3110 CHKMEMQ; 3111 } 3112 for (k=id[i]; k<id[i+1]; k++) { 3113 if (jd[k] != i) { 3114 jj[cnt++] = rstart + jd[k]; 3115 CHKMEMQ; 3116 } 3117 } 3118 for (; j<io[i+1]; j++) { 3119 jj[cnt++] = garray[jo[j]]; 3120 CHKMEMQ; 3121 } 3122 } 3123 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 3124 PetscFunctionReturn(0); 3125 } 3126 EXTERN_C_END 3127 3128 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3129 EXTERN_C_BEGIN 3130 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 3131 EXTERN_C_END 3132 3133 EXTERN_C_BEGIN 3134 #undef __FUNCT__ 3135 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3136 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 3137 { 3138 PetscErrorCode ierr; 3139 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3140 Mat B; 3141 Mat_MPIAIJ *b; 3142 3143 PetscFunctionBegin; 3144 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 3145 3146 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3147 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3148 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3149 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3150 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 3151 b = (Mat_MPIAIJ*) B->data; 3152 3153 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3154 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3155 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3156 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3157 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3158 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3159 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3160 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3161 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3162 if (reuse == MAT_REUSE_MATRIX) { 3163 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3164 } else { 3165 *newmat = B; 3166 } 3167 PetscFunctionReturn(0); 3168 } 3169 EXTERN_C_END 3170 3171 EXTERN_C_BEGIN 3172 #if defined(PETSC_HAVE_MUMPS) 3173 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3174 #endif 3175 EXTERN_C_END 3176 3177 /*MC 3178 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3179 3180 Options Database Keys: 3181 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3182 . -mat_block_size <bs> - set the blocksize used to store the matrix 3183 - -mat_use_hash_table <fact> 3184 3185 Level: beginner 3186 3187 .seealso: MatCreateMPIBAIJ 3188 M*/ 3189 3190 EXTERN_C_BEGIN 3191 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3192 EXTERN_C_END 3193 3194 EXTERN_C_BEGIN 3195 #undef __FUNCT__ 3196 #define __FUNCT__ "MatCreate_MPIBAIJ" 3197 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3198 { 3199 Mat_MPIBAIJ *b; 3200 PetscErrorCode ierr; 3201 PetscBool flg; 3202 3203 PetscFunctionBegin; 3204 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3205 B->data = (void*)b; 3206 3207 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3208 B->assembled = PETSC_FALSE; 3209 3210 B->insertmode = NOT_SET_VALUES; 3211 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3212 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3213 3214 /* build local table of row and column ownerships */ 3215 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3216 3217 /* build cache for off array entries formed */ 3218 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 3219 3220 b->donotstash = PETSC_FALSE; 3221 b->colmap = NULL; 3222 b->garray = NULL; 3223 b->roworiented = PETSC_TRUE; 3224 3225 /* stuff used in block assembly */ 3226 b->barray = 0; 3227 3228 /* stuff used for matrix vector multiply */ 3229 b->lvec = 0; 3230 b->Mvctx = 0; 3231 3232 /* stuff for MatGetRow() */ 3233 b->rowindices = 0; 3234 b->rowvalues = 0; 3235 b->getrowactive = PETSC_FALSE; 3236 3237 /* hash table stuff */ 3238 b->ht = 0; 3239 b->hd = 0; 3240 b->ht_size = 0; 3241 b->ht_flag = PETSC_FALSE; 3242 b->ht_fact = 0; 3243 b->ht_total_ct = 0; 3244 b->ht_insert_ct = 0; 3245 3246 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3247 b->ijonly = PETSC_FALSE; 3248 3249 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3250 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 3251 if (flg) { 3252 PetscReal fact = 1.39; 3253 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3254 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3255 if (fact <= 1.0) fact = 1.39; 3256 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3257 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3258 } 3259 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3260 3261 #if defined(PETSC_HAVE_MUMPS) 3262 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3263 #endif 3264 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3265 "MatConvert_MPIBAIJ_MPIAdj", 3266 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3267 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3268 "MatConvert_MPIBAIJ_MPIAIJ", 3269 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3270 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C", 3271 "MatConvert_MPIBAIJ_MPISBAIJ", 3272 MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3273 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3274 "MatStoreValues_MPIBAIJ", 3275 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3276 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3277 "MatRetrieveValues_MPIBAIJ", 3278 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3279 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3280 "MatGetDiagonalBlock_MPIBAIJ", 3281 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3282 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3283 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3284 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3285 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3286 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3287 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3288 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3289 "MatDiagonalScaleLocal_MPIBAIJ", 3290 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3291 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3292 "MatSetHashTableFactor_MPIBAIJ", 3293 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3294 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C", 3295 "MatConvert_MPIBAIJ_MPIBSTRM", 3296 MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3297 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3298 PetscFunctionReturn(0); 3299 } 3300 EXTERN_C_END 3301 3302 /*MC 3303 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3304 3305 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3306 and MATMPIBAIJ otherwise. 3307 3308 Options Database Keys: 3309 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3310 3311 Level: beginner 3312 3313 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3314 M*/ 3315 3316 #undef __FUNCT__ 3317 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3318 /*@C 3319 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3320 (block compressed row). For good matrix assembly performance 3321 the user should preallocate the matrix storage by setting the parameters 3322 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3323 performance can be increased by more than a factor of 50. 3324 3325 Collective on Mat 3326 3327 Input Parameters: 3328 + A - the matrix 3329 . bs - size of blockk 3330 . d_nz - number of block nonzeros per block row in diagonal portion of local 3331 submatrix (same for all local rows) 3332 . d_nnz - array containing the number of block nonzeros in the various block rows 3333 of the in diagonal portion of the local (possibly different for each block 3334 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3335 set it even if it is zero. 3336 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3337 submatrix (same for all local rows). 3338 - o_nnz - array containing the number of nonzeros in the various block rows of the 3339 off-diagonal portion of the local submatrix (possibly different for 3340 each block row) or NULL. 3341 3342 If the *_nnz parameter is given then the *_nz parameter is ignored 3343 3344 Options Database Keys: 3345 + -mat_block_size - size of the blocks to use 3346 - -mat_use_hash_table <fact> 3347 3348 Notes: 3349 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3350 than it must be used on all processors that share the object for that argument. 3351 3352 Storage Information: 3353 For a square global matrix we define each processor's diagonal portion 3354 to be its local rows and the corresponding columns (a square submatrix); 3355 each processor's off-diagonal portion encompasses the remainder of the 3356 local matrix (a rectangular submatrix). 3357 3358 The user can specify preallocated storage for the diagonal part of 3359 the local submatrix with either d_nz or d_nnz (not both). Set 3360 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3361 memory allocation. Likewise, specify preallocated storage for the 3362 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3363 3364 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3365 the figure below we depict these three local rows and all columns (0-11). 3366 3367 .vb 3368 0 1 2 3 4 5 6 7 8 9 10 11 3369 ------------------- 3370 row 3 | o o o d d d o o o o o o 3371 row 4 | o o o d d d o o o o o o 3372 row 5 | o o o d d d o o o o o o 3373 ------------------- 3374 .ve 3375 3376 Thus, any entries in the d locations are stored in the d (diagonal) 3377 submatrix, and any entries in the o locations are stored in the 3378 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3379 stored simply in the MATSEQBAIJ format for compressed row storage. 3380 3381 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3382 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3383 In general, for PDE problems in which most nonzeros are near the diagonal, 3384 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3385 or you will get TERRIBLE performance; see the users' manual chapter on 3386 matrices. 3387 3388 You can call MatGetInfo() to get information on how effective the preallocation was; 3389 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3390 You can also run with the option -info and look for messages with the string 3391 malloc in them to see if additional memory allocation was needed. 3392 3393 Level: intermediate 3394 3395 .keywords: matrix, block, aij, compressed row, sparse, parallel 3396 3397 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3398 @*/ 3399 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3400 { 3401 PetscErrorCode ierr; 3402 3403 PetscFunctionBegin; 3404 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3405 PetscValidType(B,1); 3406 PetscValidLogicalCollectiveInt(B,bs,2); 3407 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3408 PetscFunctionReturn(0); 3409 } 3410 3411 #undef __FUNCT__ 3412 #define __FUNCT__ "MatCreateBAIJ" 3413 /*@C 3414 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3415 (block compressed row). For good matrix assembly performance 3416 the user should preallocate the matrix storage by setting the parameters 3417 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3418 performance can be increased by more than a factor of 50. 3419 3420 Collective on MPI_Comm 3421 3422 Input Parameters: 3423 + comm - MPI communicator 3424 . bs - size of blockk 3425 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3426 This value should be the same as the local size used in creating the 3427 y vector for the matrix-vector product y = Ax. 3428 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3429 This value should be the same as the local size used in creating the 3430 x vector for the matrix-vector product y = Ax. 3431 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3432 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3433 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3434 submatrix (same for all local rows) 3435 . d_nnz - array containing the number of nonzero blocks in the various block rows 3436 of the in diagonal portion of the local (possibly different for each block 3437 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3438 and set it even if it is zero. 3439 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3440 submatrix (same for all local rows). 3441 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3442 off-diagonal portion of the local submatrix (possibly different for 3443 each block row) or NULL. 3444 3445 Output Parameter: 3446 . A - the matrix 3447 3448 Options Database Keys: 3449 + -mat_block_size - size of the blocks to use 3450 - -mat_use_hash_table <fact> 3451 3452 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3453 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3454 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3455 3456 Notes: 3457 If the *_nnz parameter is given then the *_nz parameter is ignored 3458 3459 A nonzero block is any block that as 1 or more nonzeros in it 3460 3461 The user MUST specify either the local or global matrix dimensions 3462 (possibly both). 3463 3464 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3465 than it must be used on all processors that share the object for that argument. 3466 3467 Storage Information: 3468 For a square global matrix we define each processor's diagonal portion 3469 to be its local rows and the corresponding columns (a square submatrix); 3470 each processor's off-diagonal portion encompasses the remainder of the 3471 local matrix (a rectangular submatrix). 3472 3473 The user can specify preallocated storage for the diagonal part of 3474 the local submatrix with either d_nz or d_nnz (not both). Set 3475 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3476 memory allocation. Likewise, specify preallocated storage for the 3477 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3478 3479 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3480 the figure below we depict these three local rows and all columns (0-11). 3481 3482 .vb 3483 0 1 2 3 4 5 6 7 8 9 10 11 3484 ------------------- 3485 row 3 | o o o d d d o o o o o o 3486 row 4 | o o o d d d o o o o o o 3487 row 5 | o o o d d d o o o o o o 3488 ------------------- 3489 .ve 3490 3491 Thus, any entries in the d locations are stored in the d (diagonal) 3492 submatrix, and any entries in the o locations are stored in the 3493 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3494 stored simply in the MATSEQBAIJ format for compressed row storage. 3495 3496 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3497 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3498 In general, for PDE problems in which most nonzeros are near the diagonal, 3499 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3500 or you will get TERRIBLE performance; see the users' manual chapter on 3501 matrices. 3502 3503 Level: intermediate 3504 3505 .keywords: matrix, block, aij, compressed row, sparse, parallel 3506 3507 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3508 @*/ 3509 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3510 { 3511 PetscErrorCode ierr; 3512 PetscMPIInt size; 3513 3514 PetscFunctionBegin; 3515 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3516 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3517 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3518 if (size > 1) { 3519 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3520 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3521 } else { 3522 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3523 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3524 } 3525 PetscFunctionReturn(0); 3526 } 3527 3528 #undef __FUNCT__ 3529 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3530 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3531 { 3532 Mat mat; 3533 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3534 PetscErrorCode ierr; 3535 PetscInt len=0; 3536 3537 PetscFunctionBegin; 3538 *newmat = 0; 3539 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3540 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3541 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3542 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3543 3544 mat->factortype = matin->factortype; 3545 mat->preallocated = PETSC_TRUE; 3546 mat->assembled = PETSC_TRUE; 3547 mat->insertmode = NOT_SET_VALUES; 3548 3549 a = (Mat_MPIBAIJ*)mat->data; 3550 mat->rmap->bs = matin->rmap->bs; 3551 a->bs2 = oldmat->bs2; 3552 a->mbs = oldmat->mbs; 3553 a->nbs = oldmat->nbs; 3554 a->Mbs = oldmat->Mbs; 3555 a->Nbs = oldmat->Nbs; 3556 3557 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3558 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3559 3560 a->size = oldmat->size; 3561 a->rank = oldmat->rank; 3562 a->donotstash = oldmat->donotstash; 3563 a->roworiented = oldmat->roworiented; 3564 a->rowindices = 0; 3565 a->rowvalues = 0; 3566 a->getrowactive = PETSC_FALSE; 3567 a->barray = 0; 3568 a->rstartbs = oldmat->rstartbs; 3569 a->rendbs = oldmat->rendbs; 3570 a->cstartbs = oldmat->cstartbs; 3571 a->cendbs = oldmat->cendbs; 3572 3573 /* hash table stuff */ 3574 a->ht = 0; 3575 a->hd = 0; 3576 a->ht_size = 0; 3577 a->ht_flag = oldmat->ht_flag; 3578 a->ht_fact = oldmat->ht_fact; 3579 a->ht_total_ct = 0; 3580 a->ht_insert_ct = 0; 3581 3582 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3583 if (oldmat->colmap) { 3584 #if defined(PETSC_USE_CTABLE) 3585 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3586 #else 3587 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3588 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3589 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3590 #endif 3591 } else a->colmap = 0; 3592 3593 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3594 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3595 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3596 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3597 } else a->garray = 0; 3598 3599 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3600 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3601 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3602 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3603 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3604 3605 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3606 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3607 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3608 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3609 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3610 *newmat = mat; 3611 PetscFunctionReturn(0); 3612 } 3613 3614 #undef __FUNCT__ 3615 #define __FUNCT__ "MatLoad_MPIBAIJ" 3616 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3617 { 3618 PetscErrorCode ierr; 3619 int fd; 3620 PetscInt i,nz,j,rstart,rend; 3621 PetscScalar *vals,*buf; 3622 MPI_Comm comm; 3623 MPI_Status status; 3624 PetscMPIInt rank,size,maxnz; 3625 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3626 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3627 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3628 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3629 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3630 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3631 3632 PetscFunctionBegin; 3633 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3634 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3635 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3636 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3637 3638 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3639 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3640 if (!rank) { 3641 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3642 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3643 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3644 } 3645 3646 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3647 3648 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3649 M = header[1]; N = header[2]; 3650 3651 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3652 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3653 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3654 3655 /* If global sizes are set, check if they are consistent with that given in the file */ 3656 if (sizesset) { 3657 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3658 } 3659 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3660 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3661 3662 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3663 3664 /* 3665 This code adds extra rows to make sure the number of rows is 3666 divisible by the blocksize 3667 */ 3668 Mbs = M/bs; 3669 extra_rows = bs - M + bs*Mbs; 3670 if (extra_rows == bs) extra_rows = 0; 3671 else Mbs++; 3672 if (extra_rows && !rank) { 3673 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3674 } 3675 3676 /* determine ownership of all rows */ 3677 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3678 mbs = Mbs/size + ((Mbs % size) > rank); 3679 m = mbs*bs; 3680 } else { /* User set */ 3681 m = newmat->rmap->n; 3682 mbs = m/bs; 3683 } 3684 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3685 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3686 3687 /* process 0 needs enough room for process with most rows */ 3688 if (!rank) { 3689 mmax = rowners[1]; 3690 for (i=2; i<=size; i++) { 3691 mmax = PetscMax(mmax,rowners[i]); 3692 } 3693 mmax*=bs; 3694 } else mmax = m; 3695 3696 rowners[0] = 0; 3697 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3698 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3699 rstart = rowners[rank]; 3700 rend = rowners[rank+1]; 3701 3702 /* distribute row lengths to all processors */ 3703 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3704 if (!rank) { 3705 mend = m; 3706 if (size == 1) mend = mend - extra_rows; 3707 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3708 for (j=mend; j<m; j++) locrowlens[j] = 1; 3709 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3710 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3711 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3712 for (j=0; j<m; j++) { 3713 procsnz[0] += locrowlens[j]; 3714 } 3715 for (i=1; i<size; i++) { 3716 mend = browners[i+1] - browners[i]; 3717 if (i == size-1) mend = mend - extra_rows; 3718 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3719 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3720 /* calculate the number of nonzeros on each processor */ 3721 for (j=0; j<browners[i+1]-browners[i]; j++) { 3722 procsnz[i] += rowlengths[j]; 3723 } 3724 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3725 } 3726 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3727 } else { 3728 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3729 } 3730 3731 if (!rank) { 3732 /* determine max buffer needed and allocate it */ 3733 maxnz = procsnz[0]; 3734 for (i=1; i<size; i++) { 3735 maxnz = PetscMax(maxnz,procsnz[i]); 3736 } 3737 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3738 3739 /* read in my part of the matrix column indices */ 3740 nz = procsnz[0]; 3741 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3742 mycols = ibuf; 3743 if (size == 1) nz -= extra_rows; 3744 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3745 if (size == 1) { 3746 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3747 } 3748 3749 /* read in every ones (except the last) and ship off */ 3750 for (i=1; i<size-1; i++) { 3751 nz = procsnz[i]; 3752 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3753 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3754 } 3755 /* read in the stuff for the last proc */ 3756 if (size != 1) { 3757 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3758 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3759 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3760 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3761 } 3762 ierr = PetscFree(cols);CHKERRQ(ierr); 3763 } else { 3764 /* determine buffer space needed for message */ 3765 nz = 0; 3766 for (i=0; i<m; i++) { 3767 nz += locrowlens[i]; 3768 } 3769 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3770 mycols = ibuf; 3771 /* receive message of column indices*/ 3772 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3773 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3774 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3775 } 3776 3777 /* loop over local rows, determining number of off diagonal entries */ 3778 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3779 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3780 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3781 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3782 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3783 rowcount = 0; nzcount = 0; 3784 for (i=0; i<mbs; i++) { 3785 dcount = 0; 3786 odcount = 0; 3787 for (j=0; j<bs; j++) { 3788 kmax = locrowlens[rowcount]; 3789 for (k=0; k<kmax; k++) { 3790 tmp = mycols[nzcount++]/bs; 3791 if (!mask[tmp]) { 3792 mask[tmp] = 1; 3793 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3794 else masked1[dcount++] = tmp; 3795 } 3796 } 3797 rowcount++; 3798 } 3799 3800 dlens[i] = dcount; 3801 odlens[i] = odcount; 3802 3803 /* zero out the mask elements we set */ 3804 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3805 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3806 } 3807 3808 3809 if (!sizesset) { 3810 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3811 } 3812 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3813 3814 if (!rank) { 3815 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3816 /* read in my part of the matrix numerical values */ 3817 nz = procsnz[0]; 3818 vals = buf; 3819 mycols = ibuf; 3820 if (size == 1) nz -= extra_rows; 3821 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3822 if (size == 1) { 3823 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3824 } 3825 3826 /* insert into matrix */ 3827 jj = rstart*bs; 3828 for (i=0; i<m; i++) { 3829 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3830 mycols += locrowlens[i]; 3831 vals += locrowlens[i]; 3832 jj++; 3833 } 3834 /* read in other processors (except the last one) and ship out */ 3835 for (i=1; i<size-1; i++) { 3836 nz = procsnz[i]; 3837 vals = buf; 3838 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3839 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3840 } 3841 /* the last proc */ 3842 if (size != 1) { 3843 nz = procsnz[i] - extra_rows; 3844 vals = buf; 3845 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3846 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3847 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3848 } 3849 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3850 } else { 3851 /* receive numeric values */ 3852 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3853 3854 /* receive message of values*/ 3855 vals = buf; 3856 mycols = ibuf; 3857 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3858 3859 /* insert into matrix */ 3860 jj = rstart*bs; 3861 for (i=0; i<m; i++) { 3862 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3863 mycols += locrowlens[i]; 3864 vals += locrowlens[i]; 3865 jj++; 3866 } 3867 } 3868 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3869 ierr = PetscFree(buf);CHKERRQ(ierr); 3870 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3871 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3872 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3873 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3874 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3875 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3876 PetscFunctionReturn(0); 3877 } 3878 3879 #undef __FUNCT__ 3880 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3881 /*@ 3882 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3883 3884 Input Parameters: 3885 . mat - the matrix 3886 . fact - factor 3887 3888 Not Collective, each process can use a different factor 3889 3890 Level: advanced 3891 3892 Notes: 3893 This can also be set by the command line option: -mat_use_hash_table <fact> 3894 3895 .keywords: matrix, hashtable, factor, HT 3896 3897 .seealso: MatSetOption() 3898 @*/ 3899 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3900 { 3901 PetscErrorCode ierr; 3902 3903 PetscFunctionBegin; 3904 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3905 PetscFunctionReturn(0); 3906 } 3907 3908 EXTERN_C_BEGIN 3909 #undef __FUNCT__ 3910 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3911 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3912 { 3913 Mat_MPIBAIJ *baij; 3914 3915 PetscFunctionBegin; 3916 baij = (Mat_MPIBAIJ*)mat->data; 3917 baij->ht_fact = fact; 3918 PetscFunctionReturn(0); 3919 } 3920 EXTERN_C_END 3921 3922 #undef __FUNCT__ 3923 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3924 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3925 { 3926 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3927 3928 PetscFunctionBegin; 3929 *Ad = a->A; 3930 *Ao = a->B; 3931 *colmap = a->garray; 3932 PetscFunctionReturn(0); 3933 } 3934 3935 /* 3936 Special version for direct calls from Fortran (to eliminate two function call overheads 3937 */ 3938 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3939 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3940 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3941 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3942 #endif 3943 3944 #undef __FUNCT__ 3945 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3946 /*@C 3947 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3948 3949 Collective on Mat 3950 3951 Input Parameters: 3952 + mat - the matrix 3953 . min - number of input rows 3954 . im - input rows 3955 . nin - number of input columns 3956 . in - input columns 3957 . v - numerical values input 3958 - addvin - INSERT_VALUES or ADD_VALUES 3959 3960 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3961 3962 Level: advanced 3963 3964 .seealso: MatSetValuesBlocked() 3965 @*/ 3966 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3967 { 3968 /* convert input arguments to C version */ 3969 Mat mat = *matin; 3970 PetscInt m = *min, n = *nin; 3971 InsertMode addv = *addvin; 3972 3973 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3974 const MatScalar *value; 3975 MatScalar *barray = baij->barray; 3976 PetscBool roworiented = baij->roworiented; 3977 PetscErrorCode ierr; 3978 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3979 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3980 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3981 3982 PetscFunctionBegin; 3983 /* tasks normally handled by MatSetValuesBlocked() */ 3984 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3985 #if defined(PETSC_USE_DEBUG) 3986 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3987 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3988 #endif 3989 if (mat->assembled) { 3990 mat->was_assembled = PETSC_TRUE; 3991 mat->assembled = PETSC_FALSE; 3992 } 3993 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3994 3995 3996 if (!barray) { 3997 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3998 baij->barray = barray; 3999 } 4000 4001 if (roworiented) stepval = (n-1)*bs; 4002 else stepval = (m-1)*bs; 4003 4004 for (i=0; i<m; i++) { 4005 if (im[i] < 0) continue; 4006 #if defined(PETSC_USE_DEBUG) 4007 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4008 #endif 4009 if (im[i] >= rstart && im[i] < rend) { 4010 row = im[i] - rstart; 4011 for (j=0; j<n; j++) { 4012 /* If NumCol = 1 then a copy is not required */ 4013 if ((roworiented) && (n == 1)) { 4014 barray = (MatScalar*)v + i*bs2; 4015 } else if ((!roworiented) && (m == 1)) { 4016 barray = (MatScalar*)v + j*bs2; 4017 } else { /* Here a copy is required */ 4018 if (roworiented) { 4019 value = v + i*(stepval+bs)*bs + j*bs; 4020 } else { 4021 value = v + j*(stepval+bs)*bs + i*bs; 4022 } 4023 for (ii=0; ii<bs; ii++,value+=stepval) { 4024 for (jj=0; jj<bs; jj++) { 4025 *barray++ = *value++; 4026 } 4027 } 4028 barray -=bs2; 4029 } 4030 4031 if (in[j] >= cstart && in[j] < cend) { 4032 col = in[j] - cstart; 4033 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4034 } else if (in[j] < 0) continue; 4035 #if defined(PETSC_USE_DEBUG) 4036 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4037 #endif 4038 else { 4039 if (mat->was_assembled) { 4040 if (!baij->colmap) { 4041 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4042 } 4043 4044 #if defined(PETSC_USE_DEBUG) 4045 #if defined(PETSC_USE_CTABLE) 4046 { PetscInt data; 4047 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4048 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4049 } 4050 #else 4051 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4052 #endif 4053 #endif 4054 #if defined(PETSC_USE_CTABLE) 4055 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4056 col = (col - 1)/bs; 4057 #else 4058 col = (baij->colmap[in[j]] - 1)/bs; 4059 #endif 4060 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4061 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4062 col = in[j]; 4063 } 4064 } else col = in[j]; 4065 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4066 } 4067 } 4068 } else { 4069 if (!baij->donotstash) { 4070 if (roworiented) { 4071 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4072 } else { 4073 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4074 } 4075 } 4076 } 4077 } 4078 4079 /* task normally handled by MatSetValuesBlocked() */ 4080 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4081 PetscFunctionReturn(0); 4082 } 4083 4084 #undef __FUNCT__ 4085 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4086 /*@ 4087 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4088 CSR format the local rows. 4089 4090 Collective on MPI_Comm 4091 4092 Input Parameters: 4093 + comm - MPI communicator 4094 . bs - the block size, only a block size of 1 is supported 4095 . m - number of local rows (Cannot be PETSC_DECIDE) 4096 . n - This value should be the same as the local size used in creating the 4097 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4098 calculated if N is given) For square matrices n is almost always m. 4099 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4100 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4101 . i - row indices 4102 . j - column indices 4103 - a - matrix values 4104 4105 Output Parameter: 4106 . mat - the matrix 4107 4108 Level: intermediate 4109 4110 Notes: 4111 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4112 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4113 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4114 4115 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4116 4117 .keywords: matrix, aij, compressed row, sparse, parallel 4118 4119 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4120 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4121 @*/ 4122 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4123 { 4124 PetscErrorCode ierr; 4125 4126 PetscFunctionBegin; 4127 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4128 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4129 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4130 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4131 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4132 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4133 PetscFunctionReturn(0); 4134 } 4135