1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) { 29 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 30 } 31 } 32 33 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 34 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 35 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 36 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 37 38 for (i=0; i<A->rmap->n; i++) { 39 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 40 va[i] = vb[i]; 41 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 42 } 43 } 44 45 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 46 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 47 ierr = PetscFree(idxb);CHKERRQ(ierr); 48 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 52 #undef __FUNCT__ 53 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 54 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 55 { 56 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 57 PetscErrorCode ierr; 58 59 PetscFunctionBegin; 60 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 61 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 62 PetscFunctionReturn(0); 63 } 64 65 #undef __FUNCT__ 66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 67 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 68 { 69 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 74 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 75 PetscFunctionReturn(0); 76 } 77 78 /* 79 Local utility routine that creates a mapping from the global column 80 number to the local number in the off-diagonal part of the local 81 storage of the matrix. This is done in a non scalable way since the 82 length of colmap equals the global matrix length. 83 */ 84 #undef __FUNCT__ 85 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 86 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 87 { 88 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 89 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 90 PetscErrorCode ierr; 91 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 92 93 PetscFunctionBegin; 94 #if defined(PETSC_USE_CTABLE) 95 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 96 for (i=0; i<nbs; i++) { 97 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 98 } 99 #else 100 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 101 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 102 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 103 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 104 #endif 105 PetscFunctionReturn(0); 106 } 107 108 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 109 { \ 110 \ 111 brow = row/bs; \ 112 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 113 rmax = aimax[brow]; nrow = ailen[brow]; \ 114 bcol = col/bs; \ 115 ridx = row % bs; cidx = col % bs; \ 116 low = 0; high = nrow; \ 117 while (high-low > 3) { \ 118 t = (low+high)/2; \ 119 if (rp[t] > bcol) high = t; \ 120 else low = t; \ 121 } \ 122 for (_i=low; _i<high; _i++) { \ 123 if (rp[_i] > bcol) break; \ 124 if (rp[_i] == bcol) { \ 125 bap = ap + bs2*_i + bs*cidx + ridx; \ 126 if (addv == ADD_VALUES) *bap += value; \ 127 else *bap = value; \ 128 goto a_noinsert; \ 129 } \ 130 } \ 131 if (a->nonew == 1) goto a_noinsert; \ 132 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 133 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 134 N = nrow++ - 1; \ 135 /* shift up all the later entries in this row */ \ 136 for (ii=N; ii>=_i; ii--) { \ 137 rp[ii+1] = rp[ii]; \ 138 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 139 } \ 140 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 141 rp[_i] = bcol; \ 142 ap[bs2*_i + bs*cidx + ridx] = value; \ 143 a_noinsert:; \ 144 ailen[brow] = nrow; \ 145 } 146 147 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 148 { \ 149 brow = row/bs; \ 150 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 151 rmax = bimax[brow]; nrow = bilen[brow]; \ 152 bcol = col/bs; \ 153 ridx = row % bs; cidx = col % bs; \ 154 low = 0; high = nrow; \ 155 while (high-low > 3) { \ 156 t = (low+high)/2; \ 157 if (rp[t] > bcol) high = t; \ 158 else low = t; \ 159 } \ 160 for (_i=low; _i<high; _i++) { \ 161 if (rp[_i] > bcol) break; \ 162 if (rp[_i] == bcol) { \ 163 bap = ap + bs2*_i + bs*cidx + ridx; \ 164 if (addv == ADD_VALUES) *bap += value; \ 165 else *bap = value; \ 166 goto b_noinsert; \ 167 } \ 168 } \ 169 if (b->nonew == 1) goto b_noinsert; \ 170 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 171 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 172 CHKMEMQ; \ 173 N = nrow++ - 1; \ 174 /* shift up all the later entries in this row */ \ 175 for (ii=N; ii>=_i; ii--) { \ 176 rp[ii+1] = rp[ii]; \ 177 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 178 } \ 179 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 180 rp[_i] = bcol; \ 181 ap[bs2*_i + bs*cidx + ridx] = value; \ 182 b_noinsert:; \ 183 bilen[brow] = nrow; \ 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatSetValues_MPIBAIJ" 188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 189 { 190 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 191 MatScalar value; 192 PetscBool roworiented = baij->roworiented; 193 PetscErrorCode ierr; 194 PetscInt i,j,row,col; 195 PetscInt rstart_orig=mat->rmap->rstart; 196 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 197 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 198 199 /* Some Variables required in the macro */ 200 Mat A = baij->A; 201 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 202 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 203 MatScalar *aa =a->a; 204 205 Mat B = baij->B; 206 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 207 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 208 MatScalar *ba =b->a; 209 210 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 211 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 212 MatScalar *ap,*bap; 213 214 PetscFunctionBegin; 215 if (v) PetscValidScalarPointer(v,6); 216 for (i=0; i<m; i++) { 217 if (im[i] < 0) continue; 218 #if defined(PETSC_USE_DEBUG) 219 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 220 #endif 221 if (im[i] >= rstart_orig && im[i] < rend_orig) { 222 row = im[i] - rstart_orig; 223 for (j=0; j<n; j++) { 224 if (in[j] >= cstart_orig && in[j] < cend_orig) { 225 col = in[j] - cstart_orig; 226 if (roworiented) value = v[i*n+j]; 227 else value = v[i+j*m]; 228 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 229 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 230 } else if (in[j] < 0) continue; 231 #if defined(PETSC_USE_DEBUG) 232 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 #if defined(PETSC_USE_CTABLE) 240 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 241 col = col - 1; 242 #else 243 col = baij->colmap[in[j]/bs] - 1; 244 #endif 245 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 246 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 247 col = in[j]; 248 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 249 B = baij->B; 250 b = (Mat_SeqBAIJ*)(B)->data; 251 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 252 ba =b->a; 253 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 254 else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; 257 else value = v[i+j*m]; 258 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 259 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 260 } 261 } 262 } else { 263 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 264 if (!baij->donotstash) { 265 mat->assembled = PETSC_FALSE; 266 if (roworiented) { 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 268 } else { 269 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 270 } 271 } 272 } 273 } 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 279 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 280 { 281 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 282 const PetscScalar *value; 283 MatScalar *barray = baij->barray; 284 PetscBool roworiented = baij->roworiented; 285 PetscErrorCode ierr; 286 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 287 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 288 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 289 290 PetscFunctionBegin; 291 if (!barray) { 292 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 293 baij->barray = barray; 294 } 295 296 if (roworiented) stepval = (n-1)*bs; 297 else stepval = (m-1)*bs; 298 299 for (i=0; i<m; i++) { 300 if (im[i] < 0) continue; 301 #if defined(PETSC_USE_DEBUG) 302 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 303 #endif 304 if (im[i] >= rstart && im[i] < rend) { 305 row = im[i] - rstart; 306 for (j=0; j<n; j++) { 307 /* If NumCol = 1 then a copy is not required */ 308 if ((roworiented) && (n == 1)) { 309 barray = (MatScalar*)v + i*bs2; 310 } else if ((!roworiented) && (m == 1)) { 311 barray = (MatScalar*)v + j*bs2; 312 } else { /* Here a copy is required */ 313 if (roworiented) { 314 value = v + (i*(stepval+bs) + j)*bs; 315 } else { 316 value = v + (j*(stepval+bs) + i)*bs; 317 } 318 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 319 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 320 barray += bs; 321 } 322 barray -= bs2; 323 } 324 325 if (in[j] >= cstart && in[j] < cend) { 326 col = in[j] - cstart; 327 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 328 } else if (in[j] < 0) continue; 329 #if defined(PETSC_USE_DEBUG) 330 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 331 #endif 332 else { 333 if (mat->was_assembled) { 334 if (!baij->colmap) { 335 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 336 } 337 338 #if defined(PETSC_USE_DEBUG) 339 #if defined(PETSC_USE_CTABLE) 340 { PetscInt data; 341 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 342 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 343 } 344 #else 345 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 #endif 347 #endif 348 #if defined(PETSC_USE_CTABLE) 349 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 350 col = (col - 1)/bs; 351 #else 352 col = (baij->colmap[in[j]] - 1)/bs; 353 #endif 354 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 355 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 356 col = in[j]; 357 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 358 } else col = in[j]; 359 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 360 } 361 } 362 } else { 363 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 364 if (!baij->donotstash) { 365 if (roworiented) { 366 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 367 } else { 368 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 369 } 370 } 371 } 372 } 373 PetscFunctionReturn(0); 374 } 375 376 #define HASH_KEY 0.6180339887 377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 378 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 379 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 #undef __FUNCT__ 381 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 382 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 383 { 384 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 385 PetscBool roworiented = baij->roworiented; 386 PetscErrorCode ierr; 387 PetscInt i,j,row,col; 388 PetscInt rstart_orig=mat->rmap->rstart; 389 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 390 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 391 PetscReal tmp; 392 MatScalar **HD = baij->hd,value; 393 #if defined(PETSC_USE_DEBUG) 394 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 395 #endif 396 397 PetscFunctionBegin; 398 if (v) PetscValidScalarPointer(v,6); 399 for (i=0; i<m; i++) { 400 #if defined(PETSC_USE_DEBUG) 401 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 402 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 403 #endif 404 row = im[i]; 405 if (row >= rstart_orig && row < rend_orig) { 406 for (j=0; j<n; j++) { 407 col = in[j]; 408 if (roworiented) value = v[i*n+j]; 409 else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 423 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 424 } 425 } 426 #else 427 if (HT[idx] != key) { 428 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 429 if (idx == size) { 430 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 431 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 432 } 433 } 434 #endif 435 /* A HASH table entry is found, so insert the values at the correct address */ 436 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 437 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 438 } 439 } else if (!baij->donotstash) { 440 if (roworiented) { 441 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 442 } else { 443 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 444 } 445 } 446 } 447 #if defined(PETSC_USE_DEBUG) 448 baij->ht_total_ct = total_ct; 449 baij->ht_insert_ct = insert_ct; 450 #endif 451 PetscFunctionReturn(0); 452 } 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 456 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 457 { 458 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 459 PetscBool roworiented = baij->roworiented; 460 PetscErrorCode ierr; 461 PetscInt i,j,ii,jj,row,col; 462 PetscInt rstart=baij->rstartbs; 463 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 464 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 465 PetscReal tmp; 466 MatScalar **HD = baij->hd,*baij_a; 467 const PetscScalar *v_t,*value; 468 #if defined(PETSC_USE_DEBUG) 469 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 470 #endif 471 472 PetscFunctionBegin; 473 if (roworiented) stepval = (n-1)*bs; 474 else stepval = (m-1)*bs; 475 476 for (i=0; i<m; i++) { 477 #if defined(PETSC_USE_DEBUG) 478 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 479 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 480 #endif 481 row = im[i]; 482 v_t = v + i*nbs2; 483 if (row >= rstart && row < rend) { 484 for (j=0; j<n; j++) { 485 col = in[j]; 486 487 /* Look up into the Hash Table */ 488 key = row*Nbs+col+1; 489 h1 = HASH(size,key,tmp); 490 491 idx = h1; 492 #if defined(PETSC_USE_DEBUG) 493 total_ct++; 494 insert_ct++; 495 if (HT[idx] != key) { 496 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 497 if (idx == size) { 498 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 499 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 500 } 501 } 502 #else 503 if (HT[idx] != key) { 504 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 505 if (idx == size) { 506 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 507 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 508 } 509 } 510 #endif 511 baij_a = HD[idx]; 512 if (roworiented) { 513 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 514 /* value = v + (i*(stepval+bs)+j)*bs; */ 515 value = v_t; 516 v_t += bs; 517 if (addv == ADD_VALUES) { 518 for (ii=0; ii<bs; ii++,value+=stepval) { 519 for (jj=ii; jj<bs2; jj+=bs) { 520 baij_a[jj] += *value++; 521 } 522 } 523 } else { 524 for (ii=0; ii<bs; ii++,value+=stepval) { 525 for (jj=ii; jj<bs2; jj+=bs) { 526 baij_a[jj] = *value++; 527 } 528 } 529 } 530 } else { 531 value = v + j*(stepval+bs)*bs + i*bs; 532 if (addv == ADD_VALUES) { 533 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 534 for (jj=0; jj<bs; jj++) { 535 baij_a[jj] += *value++; 536 } 537 } 538 } else { 539 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 540 for (jj=0; jj<bs; jj++) { 541 baij_a[jj] = *value++; 542 } 543 } 544 } 545 } 546 } 547 } else { 548 if (!baij->donotstash) { 549 if (roworiented) { 550 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 #if defined(PETSC_USE_DEBUG) 558 baij->ht_total_ct = total_ct; 559 baij->ht_insert_ct = insert_ct; 560 #endif 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatGetValues_MPIBAIJ" 566 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 567 { 568 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 569 PetscErrorCode ierr; 570 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 571 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 572 573 PetscFunctionBegin; 574 for (i=0; i<m; i++) { 575 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 576 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 577 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 578 row = idxm[i] - bsrstart; 579 for (j=0; j<n; j++) { 580 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 581 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 582 if (idxn[j] >= bscstart && idxn[j] < bscend) { 583 col = idxn[j] - bscstart; 584 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 585 } else { 586 if (!baij->colmap) { 587 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 591 data--; 592 #else 593 data = baij->colmap[idxn[j]/bs]-1; 594 #endif 595 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 596 else { 597 col = data + idxn[j]%bs; 598 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 599 } 600 } 601 } 602 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatNorm_MPIBAIJ" 609 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 610 { 611 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 612 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 613 PetscErrorCode ierr; 614 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 615 PetscReal sum = 0.0; 616 MatScalar *v; 617 618 PetscFunctionBegin; 619 if (baij->size == 1) { 620 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 621 } else { 622 if (type == NORM_FROBENIUS) { 623 v = amat->a; 624 nz = amat->nz*bs2; 625 for (i=0; i<nz; i++) { 626 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 627 } 628 v = bmat->a; 629 nz = bmat->nz*bs2; 630 for (i=0; i<nz; i++) { 631 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 632 } 633 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 634 *nrm = PetscSqrtReal(*nrm); 635 } else if (type == NORM_1) { /* max column sum */ 636 PetscReal *tmp,*tmp2; 637 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 638 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 639 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 640 v = amat->a; jj = amat->j; 641 for (i=0; i<amat->nz; i++) { 642 for (j=0; j<bs; j++) { 643 col = bs*(cstart + *jj) + j; /* column index */ 644 for (row=0; row<bs; row++) { 645 tmp[col] += PetscAbsScalar(*v); v++; 646 } 647 } 648 jj++; 649 } 650 v = bmat->a; jj = bmat->j; 651 for (i=0; i<bmat->nz; i++) { 652 for (j=0; j<bs; j++) { 653 col = bs*garray[*jj] + j; 654 for (row=0; row<bs; row++) { 655 tmp[col] += PetscAbsScalar(*v); v++; 656 } 657 } 658 jj++; 659 } 660 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 661 *nrm = 0.0; 662 for (j=0; j<mat->cmap->N; j++) { 663 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 664 } 665 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 666 } else if (type == NORM_INFINITY) { /* max row sum */ 667 PetscReal *sums; 668 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 669 sum = 0.0; 670 for (j=0; j<amat->mbs; j++) { 671 for (row=0; row<bs; row++) sums[row] = 0.0; 672 v = amat->a + bs2*amat->i[j]; 673 nz = amat->i[j+1]-amat->i[j]; 674 for (i=0; i<nz; i++) { 675 for (col=0; col<bs; col++) { 676 for (row=0; row<bs; row++) { 677 sums[row] += PetscAbsScalar(*v); v++; 678 } 679 } 680 } 681 v = bmat->a + bs2*bmat->i[j]; 682 nz = bmat->i[j+1]-bmat->i[j]; 683 for (i=0; i<nz; i++) { 684 for (col=0; col<bs; col++) { 685 for (row=0; row<bs; row++) { 686 sums[row] += PetscAbsScalar(*v); v++; 687 } 688 } 689 } 690 for (row=0; row<bs; row++) { 691 if (sums[row] > sum) sum = sums[row]; 692 } 693 } 694 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 695 ierr = PetscFree(sums);CHKERRQ(ierr); 696 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 697 } 698 PetscFunctionReturn(0); 699 } 700 701 /* 702 Creates the hash table, and sets the table 703 This table is created only once. 704 If new entried need to be added to the matrix 705 then the hash table has to be destroyed and 706 recreated. 707 */ 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 710 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 711 { 712 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 713 Mat A = baij->A,B=baij->B; 714 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 715 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 716 PetscErrorCode ierr; 717 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 718 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 719 PetscInt *HT,key; 720 MatScalar **HD; 721 PetscReal tmp; 722 #if defined(PETSC_USE_INFO) 723 PetscInt ct=0,max=0; 724 #endif 725 726 PetscFunctionBegin; 727 if (baij->ht) PetscFunctionReturn(0); 728 729 baij->ht_size = (PetscInt)(factor*nz); 730 ht_size = baij->ht_size; 731 732 /* Allocate Memory for Hash Table */ 733 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 734 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 735 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 736 HD = baij->hd; 737 HT = baij->ht; 738 739 /* Loop Over A */ 740 for (i=0; i<a->mbs; i++) { 741 for (j=ai[i]; j<ai[i+1]; j++) { 742 row = i+rstart; 743 col = aj[j]+cstart; 744 745 key = row*Nbs + col + 1; 746 h1 = HASH(ht_size,key,tmp); 747 for (k=0; k<ht_size; k++) { 748 if (!HT[(h1+k)%ht_size]) { 749 HT[(h1+k)%ht_size] = key; 750 HD[(h1+k)%ht_size] = a->a + j*bs2; 751 break; 752 #if defined(PETSC_USE_INFO) 753 } else { 754 ct++; 755 #endif 756 } 757 } 758 #if defined(PETSC_USE_INFO) 759 if (k> max) max = k; 760 #endif 761 } 762 } 763 /* Loop Over B */ 764 for (i=0; i<b->mbs; i++) { 765 for (j=bi[i]; j<bi[i+1]; j++) { 766 row = i+rstart; 767 col = garray[bj[j]]; 768 key = row*Nbs + col + 1; 769 h1 = HASH(ht_size,key,tmp); 770 for (k=0; k<ht_size; k++) { 771 if (!HT[(h1+k)%ht_size]) { 772 HT[(h1+k)%ht_size] = key; 773 HD[(h1+k)%ht_size] = b->a + j*bs2; 774 break; 775 #if defined(PETSC_USE_INFO) 776 } else { 777 ct++; 778 #endif 779 } 780 } 781 #if defined(PETSC_USE_INFO) 782 if (k> max) max = k; 783 #endif 784 } 785 } 786 787 /* Print Summary */ 788 #if defined(PETSC_USE_INFO) 789 for (i=0,j=0; i<ht_size; i++) { 790 if (HT[i]) j++; 791 } 792 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 793 #endif 794 PetscFunctionReturn(0); 795 } 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 799 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 800 { 801 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 802 PetscErrorCode ierr; 803 PetscInt nstash,reallocs; 804 InsertMode addv; 805 806 PetscFunctionBegin; 807 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 808 809 /* make sure all processors are either in INSERTMODE or ADDMODE */ 810 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 811 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 812 mat->insertmode = addv; /* in case this processor had no cache */ 813 814 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 815 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 816 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 817 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 818 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 819 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 825 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 826 { 827 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 828 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 829 PetscErrorCode ierr; 830 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 831 PetscInt *row,*col; 832 PetscBool r1,r2,r3,other_disassembled; 833 MatScalar *val; 834 InsertMode addv = mat->insertmode; 835 PetscMPIInt n; 836 837 PetscFunctionBegin; 838 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 839 if (!baij->donotstash && !mat->nooffprocentries) { 840 while (1) { 841 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 842 if (!flg) break; 843 844 for (i=0; i<n;) { 845 /* Now identify the consecutive vals belonging to the same row */ 846 for (j=i,rstart=row[j]; j<n; j++) { 847 if (row[j] != rstart) break; 848 } 849 if (j < n) ncols = j-i; 850 else ncols = n-i; 851 /* Now assemble all these values with a single function call */ 852 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 853 i = j; 854 } 855 } 856 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 857 /* Now process the block-stash. Since the values are stashed column-oriented, 858 set the roworiented flag to column oriented, and after MatSetValues() 859 restore the original flags */ 860 r1 = baij->roworiented; 861 r2 = a->roworiented; 862 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 863 864 baij->roworiented = PETSC_FALSE; 865 a->roworiented = PETSC_FALSE; 866 867 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 868 while (1) { 869 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 870 if (!flg) break; 871 872 for (i=0; i<n;) { 873 /* Now identify the consecutive vals belonging to the same row */ 874 for (j=i,rstart=row[j]; j<n; j++) { 875 if (row[j] != rstart) break; 876 } 877 if (j < n) ncols = j-i; 878 else ncols = n-i; 879 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 880 i = j; 881 } 882 } 883 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 884 885 baij->roworiented = r1; 886 a->roworiented = r2; 887 888 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 889 } 890 891 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 892 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 893 894 /* determine if any processor has disassembled, if so we must 895 also disassemble ourselfs, in order that we may reassemble. */ 896 /* 897 if nonzero structure of submatrix B cannot change then we know that 898 no processor disassembled thus we can skip this stuff 899 */ 900 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 901 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 902 if (mat->was_assembled && !other_disassembled) { 903 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 904 } 905 } 906 907 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 908 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 909 } 910 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 911 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 912 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 913 914 #if defined(PETSC_USE_INFO) 915 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 916 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 917 918 baij->ht_total_ct = 0; 919 baij->ht_insert_ct = 0; 920 } 921 #endif 922 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 923 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 924 925 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 926 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 927 } 928 929 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 930 931 baij->rowvalues = 0; 932 PetscFunctionReturn(0); 933 } 934 935 #include <petscdraw.h> 936 #undef __FUNCT__ 937 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 938 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 939 { 940 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 941 PetscErrorCode ierr; 942 PetscMPIInt size = baij->size,rank = baij->rank; 943 PetscInt bs = mat->rmap->bs; 944 PetscBool iascii,isdraw; 945 PetscViewer sviewer; 946 PetscViewerFormat format; 947 948 PetscFunctionBegin; 949 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 950 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 951 if (iascii) { 952 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 953 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 954 MatInfo info; 955 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 956 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 957 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 958 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 959 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 960 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 961 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 962 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 963 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 964 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 965 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 966 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 967 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } else if (format == PETSC_VIEWER_ASCII_INFO) { 970 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 971 PetscFunctionReturn(0); 972 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 973 PetscFunctionReturn(0); 974 } 975 } 976 977 if (isdraw) { 978 PetscDraw draw; 979 PetscBool isnull; 980 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 981 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 982 } 983 984 if (size == 1) { 985 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 986 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 987 } else { 988 /* assemble the entire matrix onto first processor. */ 989 Mat A; 990 Mat_SeqBAIJ *Aloc; 991 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 992 MatScalar *a; 993 994 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 995 /* Perhaps this should be the type of mat? */ 996 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 997 if (!rank) { 998 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 999 } else { 1000 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1001 } 1002 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1003 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1004 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1005 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1006 1007 /* copy over the A part */ 1008 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1009 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1010 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1011 1012 for (i=0; i<mbs; i++) { 1013 rvals[0] = bs*(baij->rstartbs + i); 1014 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1015 for (j=ai[i]; j<ai[i+1]; j++) { 1016 col = (baij->cstartbs+aj[j])*bs; 1017 for (k=0; k<bs; k++) { 1018 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1019 col++; a += bs; 1020 } 1021 } 1022 } 1023 /* copy over the B part */ 1024 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1025 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1026 for (i=0; i<mbs; i++) { 1027 rvals[0] = bs*(baij->rstartbs + i); 1028 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1029 for (j=ai[i]; j<ai[i+1]; j++) { 1030 col = baij->garray[aj[j]]*bs; 1031 for (k=0; k<bs; k++) { 1032 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1033 col++; a += bs; 1034 } 1035 } 1036 } 1037 ierr = PetscFree(rvals);CHKERRQ(ierr); 1038 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1039 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1040 /* 1041 Everyone has to call to draw the matrix since the graphics waits are 1042 synchronized across all processors that share the PetscDraw object 1043 */ 1044 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1045 if (!rank) { 1046 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1047 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1048 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1049 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1050 } 1051 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1052 ierr = MatDestroy(&A);CHKERRQ(ierr); 1053 } 1054 PetscFunctionReturn(0); 1055 } 1056 1057 #undef __FUNCT__ 1058 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1059 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1060 { 1061 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1062 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1063 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1064 PetscErrorCode ierr; 1065 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1066 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1067 int fd; 1068 PetscScalar *column_values; 1069 FILE *file; 1070 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1071 PetscInt message_count,flowcontrolcount; 1072 1073 PetscFunctionBegin; 1074 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1075 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1076 nz = bs2*(A->nz + B->nz); 1077 rlen = mat->rmap->n; 1078 if (!rank) { 1079 header[0] = MAT_FILE_CLASSID; 1080 header[1] = mat->rmap->N; 1081 header[2] = mat->cmap->N; 1082 1083 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1084 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1085 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1086 /* get largest number of rows any processor has */ 1087 range = mat->rmap->range; 1088 for (i=1; i<size; i++) { 1089 rlen = PetscMax(rlen,range[i+1] - range[i]); 1090 } 1091 } else { 1092 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1093 } 1094 1095 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1096 /* compute lengths of each row */ 1097 for (i=0; i<a->mbs; i++) { 1098 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1099 } 1100 /* store the row lengths to the file */ 1101 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1102 if (!rank) { 1103 MPI_Status status; 1104 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1105 rlen = (range[1] - range[0])/bs; 1106 for (i=0; i<rlen; i++) { 1107 for (j=0; j<bs; j++) { 1108 row_lens[i*bs+j] = bs*crow_lens[i]; 1109 } 1110 } 1111 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1112 for (i=1; i<size; i++) { 1113 rlen = (range[i+1] - range[i])/bs; 1114 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1115 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1116 for (k=0; k<rlen; k++) { 1117 for (j=0; j<bs; j++) { 1118 row_lens[k*bs+j] = bs*crow_lens[k]; 1119 } 1120 } 1121 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1122 } 1123 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1124 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1125 } else { 1126 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1127 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1128 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1129 } 1130 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1131 1132 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1133 information needed to make it for each row from a block row. This does require more communication but still not more than 1134 the communication needed for the nonzero values */ 1135 nzmax = nz; /* space a largest processor needs */ 1136 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1137 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1138 cnt = 0; 1139 for (i=0; i<a->mbs; i++) { 1140 pcnt = cnt; 1141 for (j=B->i[i]; j<B->i[i+1]; j++) { 1142 if ((col = garray[B->j[j]]) > cstart) break; 1143 for (l=0; l<bs; l++) { 1144 column_indices[cnt++] = bs*col+l; 1145 } 1146 } 1147 for (k=A->i[i]; k<A->i[i+1]; k++) { 1148 for (l=0; l<bs; l++) { 1149 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1150 } 1151 } 1152 for (; j<B->i[i+1]; j++) { 1153 for (l=0; l<bs; l++) { 1154 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1155 } 1156 } 1157 len = cnt - pcnt; 1158 for (k=1; k<bs; k++) { 1159 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1160 cnt += len; 1161 } 1162 } 1163 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1164 1165 /* store the columns to the file */ 1166 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1167 if (!rank) { 1168 MPI_Status status; 1169 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1170 for (i=1; i<size; i++) { 1171 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1172 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1173 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1174 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1175 } 1176 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1177 } else { 1178 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1179 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1180 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1181 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1182 } 1183 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1184 1185 /* load up the numerical values */ 1186 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1187 cnt = 0; 1188 for (i=0; i<a->mbs; i++) { 1189 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1190 for (j=B->i[i]; j<B->i[i+1]; j++) { 1191 if (garray[B->j[j]] > cstart) break; 1192 for (l=0; l<bs; l++) { 1193 for (ll=0; ll<bs; ll++) { 1194 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1195 } 1196 } 1197 cnt += bs; 1198 } 1199 for (k=A->i[i]; k<A->i[i+1]; k++) { 1200 for (l=0; l<bs; l++) { 1201 for (ll=0; ll<bs; ll++) { 1202 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1203 } 1204 } 1205 cnt += bs; 1206 } 1207 for (; j<B->i[i+1]; j++) { 1208 for (l=0; l<bs; l++) { 1209 for (ll=0; ll<bs; ll++) { 1210 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1211 } 1212 } 1213 cnt += bs; 1214 } 1215 cnt += (bs-1)*rlen; 1216 } 1217 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1218 1219 /* store the column values to the file */ 1220 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1221 if (!rank) { 1222 MPI_Status status; 1223 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1224 for (i=1; i<size; i++) { 1225 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1226 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1227 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1228 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1229 } 1230 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1231 } else { 1232 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1233 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1234 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1235 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1236 } 1237 ierr = PetscFree(column_values);CHKERRQ(ierr); 1238 1239 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1240 if (file) { 1241 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1242 } 1243 PetscFunctionReturn(0); 1244 } 1245 1246 #undef __FUNCT__ 1247 #define __FUNCT__ "MatView_MPIBAIJ" 1248 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1249 { 1250 PetscErrorCode ierr; 1251 PetscBool iascii,isdraw,issocket,isbinary; 1252 1253 PetscFunctionBegin; 1254 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1255 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1256 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1257 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1258 if (iascii || isdraw || issocket) { 1259 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1260 } else if (isbinary) { 1261 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1262 } 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1268 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1269 { 1270 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 #if defined(PETSC_USE_LOG) 1275 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1276 #endif 1277 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1278 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1279 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1280 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1281 #if defined(PETSC_USE_CTABLE) 1282 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1283 #else 1284 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1285 #endif 1286 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1287 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1288 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1289 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1290 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1291 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1292 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1293 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1294 1295 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",NULL);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatMult_MPIBAIJ" 1310 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1311 { 1312 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1313 PetscErrorCode ierr; 1314 PetscInt nt; 1315 1316 PetscFunctionBegin; 1317 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1318 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1319 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1320 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1321 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1322 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1323 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1324 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1325 PetscFunctionReturn(0); 1326 } 1327 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1330 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1331 { 1332 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1333 PetscErrorCode ierr; 1334 1335 PetscFunctionBegin; 1336 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1337 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1338 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1339 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 #undef __FUNCT__ 1344 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1345 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1346 { 1347 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1348 PetscErrorCode ierr; 1349 PetscBool merged; 1350 1351 PetscFunctionBegin; 1352 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1353 /* do nondiagonal part */ 1354 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1355 if (!merged) { 1356 /* send it on its way */ 1357 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1358 /* do local part */ 1359 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1360 /* receive remote parts: note this assumes the values are not actually */ 1361 /* inserted in yy until the next line */ 1362 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1363 } else { 1364 /* do local part */ 1365 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1366 /* send it on its way */ 1367 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1368 /* values actually were received in the Begin() but we need to call this nop */ 1369 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1370 } 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1376 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1377 { 1378 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1379 PetscErrorCode ierr; 1380 1381 PetscFunctionBegin; 1382 /* do nondiagonal part */ 1383 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1384 /* send it on its way */ 1385 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1386 /* do local part */ 1387 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1388 /* receive remote parts: note this assumes the values are not actually */ 1389 /* inserted in yy until the next line, which is true for my implementation*/ 1390 /* but is not perhaps always true. */ 1391 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1392 PetscFunctionReturn(0); 1393 } 1394 1395 /* 1396 This only works correctly for square matrices where the subblock A->A is the 1397 diagonal block 1398 */ 1399 #undef __FUNCT__ 1400 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1401 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1402 { 1403 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1404 PetscErrorCode ierr; 1405 1406 PetscFunctionBegin; 1407 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1408 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatScale_MPIBAIJ" 1414 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1415 { 1416 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1417 PetscErrorCode ierr; 1418 1419 PetscFunctionBegin; 1420 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1421 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1427 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1428 { 1429 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1430 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1431 PetscErrorCode ierr; 1432 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1433 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1434 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1435 1436 PetscFunctionBegin; 1437 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1438 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1439 mat->getrowactive = PETSC_TRUE; 1440 1441 if (!mat->rowvalues && (idx || v)) { 1442 /* 1443 allocate enough space to hold information from the longest row. 1444 */ 1445 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1446 PetscInt max = 1,mbs = mat->mbs,tmp; 1447 for (i=0; i<mbs; i++) { 1448 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1449 if (max < tmp) max = tmp; 1450 } 1451 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1452 } 1453 lrow = row - brstart; 1454 1455 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1456 if (!v) {pvA = 0; pvB = 0;} 1457 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1458 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1459 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1460 nztot = nzA + nzB; 1461 1462 cmap = mat->garray; 1463 if (v || idx) { 1464 if (nztot) { 1465 /* Sort by increasing column numbers, assuming A and B already sorted */ 1466 PetscInt imark = -1; 1467 if (v) { 1468 *v = v_p = mat->rowvalues; 1469 for (i=0; i<nzB; i++) { 1470 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1471 else break; 1472 } 1473 imark = i; 1474 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1475 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1476 } 1477 if (idx) { 1478 *idx = idx_p = mat->rowindices; 1479 if (imark > -1) { 1480 for (i=0; i<imark; i++) { 1481 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1482 } 1483 } else { 1484 for (i=0; i<nzB; i++) { 1485 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1486 else break; 1487 } 1488 imark = i; 1489 } 1490 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1491 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1492 } 1493 } else { 1494 if (idx) *idx = 0; 1495 if (v) *v = 0; 1496 } 1497 } 1498 *nz = nztot; 1499 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1500 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1501 PetscFunctionReturn(0); 1502 } 1503 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1506 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1507 { 1508 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1509 1510 PetscFunctionBegin; 1511 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1512 baij->getrowactive = PETSC_FALSE; 1513 PetscFunctionReturn(0); 1514 } 1515 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1518 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1519 { 1520 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1521 PetscErrorCode ierr; 1522 1523 PetscFunctionBegin; 1524 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1525 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1526 PetscFunctionReturn(0); 1527 } 1528 1529 #undef __FUNCT__ 1530 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1531 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1532 { 1533 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1534 Mat A = a->A,B = a->B; 1535 PetscErrorCode ierr; 1536 PetscReal isend[5],irecv[5]; 1537 1538 PetscFunctionBegin; 1539 info->block_size = (PetscReal)matin->rmap->bs; 1540 1541 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1542 1543 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1544 isend[3] = info->memory; isend[4] = info->mallocs; 1545 1546 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1547 1548 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1549 isend[3] += info->memory; isend[4] += info->mallocs; 1550 1551 if (flag == MAT_LOCAL) { 1552 info->nz_used = isend[0]; 1553 info->nz_allocated = isend[1]; 1554 info->nz_unneeded = isend[2]; 1555 info->memory = isend[3]; 1556 info->mallocs = isend[4]; 1557 } else if (flag == MAT_GLOBAL_MAX) { 1558 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1559 1560 info->nz_used = irecv[0]; 1561 info->nz_allocated = irecv[1]; 1562 info->nz_unneeded = irecv[2]; 1563 info->memory = irecv[3]; 1564 info->mallocs = irecv[4]; 1565 } else if (flag == MAT_GLOBAL_SUM) { 1566 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1567 1568 info->nz_used = irecv[0]; 1569 info->nz_allocated = irecv[1]; 1570 info->nz_unneeded = irecv[2]; 1571 info->memory = irecv[3]; 1572 info->mallocs = irecv[4]; 1573 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1574 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1575 info->fill_ratio_needed = 0; 1576 info->factor_mallocs = 0; 1577 PetscFunctionReturn(0); 1578 } 1579 1580 #undef __FUNCT__ 1581 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1582 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1583 { 1584 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1585 PetscErrorCode ierr; 1586 1587 PetscFunctionBegin; 1588 switch (op) { 1589 case MAT_NEW_NONZERO_LOCATIONS: 1590 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1591 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1592 case MAT_KEEP_NONZERO_PATTERN: 1593 case MAT_NEW_NONZERO_LOCATION_ERR: 1594 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1595 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1596 break; 1597 case MAT_ROW_ORIENTED: 1598 a->roworiented = flg; 1599 1600 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1601 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1602 break; 1603 case MAT_NEW_DIAGONALS: 1604 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1605 break; 1606 case MAT_IGNORE_OFF_PROC_ENTRIES: 1607 a->donotstash = flg; 1608 break; 1609 case MAT_USE_HASH_TABLE: 1610 a->ht_flag = flg; 1611 break; 1612 case MAT_SYMMETRIC: 1613 case MAT_STRUCTURALLY_SYMMETRIC: 1614 case MAT_HERMITIAN: 1615 case MAT_SYMMETRY_ETERNAL: 1616 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1617 break; 1618 default: 1619 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1620 } 1621 PetscFunctionReturn(0); 1622 } 1623 1624 #undef __FUNCT__ 1625 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1626 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1627 { 1628 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1629 Mat_SeqBAIJ *Aloc; 1630 Mat B; 1631 PetscErrorCode ierr; 1632 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1633 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1634 MatScalar *a; 1635 1636 PetscFunctionBegin; 1637 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1638 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1639 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1640 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1641 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1642 /* Do not know preallocation information, but must set block size */ 1643 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1644 } else { 1645 B = *matout; 1646 } 1647 1648 /* copy over the A part */ 1649 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1650 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1651 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1652 1653 for (i=0; i<mbs; i++) { 1654 rvals[0] = bs*(baij->rstartbs + i); 1655 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1656 for (j=ai[i]; j<ai[i+1]; j++) { 1657 col = (baij->cstartbs+aj[j])*bs; 1658 for (k=0; k<bs; k++) { 1659 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1660 1661 col++; a += bs; 1662 } 1663 } 1664 } 1665 /* copy over the B part */ 1666 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1667 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1668 for (i=0; i<mbs; i++) { 1669 rvals[0] = bs*(baij->rstartbs + i); 1670 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1671 for (j=ai[i]; j<ai[i+1]; j++) { 1672 col = baij->garray[aj[j]]*bs; 1673 for (k=0; k<bs; k++) { 1674 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1675 col++; 1676 a += bs; 1677 } 1678 } 1679 } 1680 ierr = PetscFree(rvals);CHKERRQ(ierr); 1681 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1682 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1683 1684 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1685 else { 1686 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1687 } 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1693 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1694 { 1695 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1696 Mat a = baij->A,b = baij->B; 1697 PetscErrorCode ierr; 1698 PetscInt s1,s2,s3; 1699 1700 PetscFunctionBegin; 1701 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1702 if (rr) { 1703 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1704 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1705 /* Overlap communication with computation. */ 1706 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1707 } 1708 if (ll) { 1709 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1710 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1711 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1712 } 1713 /* scale the diagonal block */ 1714 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1715 1716 if (rr) { 1717 /* Do a scatter end and then right scale the off-diagonal block */ 1718 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1719 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1720 } 1721 PetscFunctionReturn(0); 1722 } 1723 1724 #undef __FUNCT__ 1725 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1726 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1727 { 1728 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1729 PetscErrorCode ierr; 1730 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1731 PetscInt i,*owners = A->rmap->range; 1732 PetscInt *nprocs,j,idx,nsends,row; 1733 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1734 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1735 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1736 MPI_Comm comm; 1737 MPI_Request *send_waits,*recv_waits; 1738 MPI_Status recv_status,*send_status; 1739 const PetscScalar *xx; 1740 PetscScalar *bb; 1741 #if defined(PETSC_DEBUG) 1742 PetscBool found = PETSC_FALSE; 1743 #endif 1744 1745 PetscFunctionBegin; 1746 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1747 /* first count number of contributors to each processor */ 1748 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1749 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1750 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1751 j = 0; 1752 for (i=0; i<N; i++) { 1753 if (lastidx > (idx = rows[i])) j = 0; 1754 lastidx = idx; 1755 for (; j<size; j++) { 1756 if (idx >= owners[j] && idx < owners[j+1]) { 1757 nprocs[2*j]++; 1758 nprocs[2*j+1] = 1; 1759 owner[i] = j; 1760 #if defined(PETSC_DEBUG) 1761 found = PETSC_TRUE; 1762 #endif 1763 break; 1764 } 1765 } 1766 #if defined(PETSC_DEBUG) 1767 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1768 found = PETSC_FALSE; 1769 #endif 1770 } 1771 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 1772 1773 if (A->nooffproczerorows) { 1774 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1775 nrecvs = nsends; 1776 nmax = N; 1777 } else { 1778 /* inform other processors of number of messages and max length*/ 1779 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1780 } 1781 1782 /* post receives: */ 1783 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1784 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1785 for (i=0; i<nrecvs; i++) { 1786 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1787 } 1788 1789 /* do sends: 1790 1) starts[i] gives the starting index in svalues for stuff going to 1791 the ith processor 1792 */ 1793 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1794 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1795 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1796 starts[0] = 0; 1797 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 1798 for (i=0; i<N; i++) { 1799 svalues[starts[owner[i]]++] = rows[i]; 1800 } 1801 1802 starts[0] = 0; 1803 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 1804 count = 0; 1805 for (i=0; i<size; i++) { 1806 if (nprocs[2*i+1]) { 1807 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1808 } 1809 } 1810 ierr = PetscFree(starts);CHKERRQ(ierr); 1811 1812 base = owners[rank]; 1813 1814 /* wait on receives */ 1815 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1816 count = nrecvs; 1817 slen = 0; 1818 while (count) { 1819 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1820 /* unpack receives into our local space */ 1821 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1822 1823 source[imdex] = recv_status.MPI_SOURCE; 1824 lens[imdex] = n; 1825 slen += n; 1826 count--; 1827 } 1828 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1829 1830 /* move the data into the send scatter */ 1831 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1832 count = 0; 1833 for (i=0; i<nrecvs; i++) { 1834 values = rvalues + i*nmax; 1835 for (j=0; j<lens[i]; j++) { 1836 lrows[count++] = values[j] - base; 1837 } 1838 } 1839 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1840 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1841 ierr = PetscFree(owner);CHKERRQ(ierr); 1842 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1843 1844 /* fix right hand side if needed */ 1845 if (x && b) { 1846 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1847 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1848 for (i=0; i<slen; i++) { 1849 bb[lrows[i]] = diag*xx[lrows[i]]; 1850 } 1851 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1852 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1853 } 1854 1855 /* actually zap the local rows */ 1856 /* 1857 Zero the required rows. If the "diagonal block" of the matrix 1858 is square and the user wishes to set the diagonal we use separate 1859 code so that MatSetValues() is not called for each diagonal allocating 1860 new memory, thus calling lots of mallocs and slowing things down. 1861 1862 */ 1863 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1864 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1865 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1866 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1867 } else if (diag != 0.0) { 1868 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1869 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1870 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1871 for (i=0; i<slen; i++) { 1872 row = lrows[i] + rstart_bs; 1873 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1874 } 1875 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1876 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1877 } else { 1878 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1879 } 1880 1881 ierr = PetscFree(lrows);CHKERRQ(ierr); 1882 1883 /* wait on sends */ 1884 if (nsends) { 1885 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1886 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1887 ierr = PetscFree(send_status);CHKERRQ(ierr); 1888 } 1889 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1890 ierr = PetscFree(svalues);CHKERRQ(ierr); 1891 PetscFunctionReturn(0); 1892 } 1893 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1896 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1897 { 1898 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1899 PetscErrorCode ierr; 1900 1901 PetscFunctionBegin; 1902 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1903 PetscFunctionReturn(0); 1904 } 1905 1906 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1907 1908 #undef __FUNCT__ 1909 #define __FUNCT__ "MatEqual_MPIBAIJ" 1910 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1911 { 1912 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1913 Mat a,b,c,d; 1914 PetscBool flg; 1915 PetscErrorCode ierr; 1916 1917 PetscFunctionBegin; 1918 a = matA->A; b = matA->B; 1919 c = matB->A; d = matB->B; 1920 1921 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1922 if (flg) { 1923 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1924 } 1925 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1926 PetscFunctionReturn(0); 1927 } 1928 1929 #undef __FUNCT__ 1930 #define __FUNCT__ "MatCopy_MPIBAIJ" 1931 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1932 { 1933 PetscErrorCode ierr; 1934 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1935 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1936 1937 PetscFunctionBegin; 1938 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1939 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1940 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1941 } else { 1942 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1943 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1944 } 1945 PetscFunctionReturn(0); 1946 } 1947 1948 #undef __FUNCT__ 1949 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1950 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1951 { 1952 PetscErrorCode ierr; 1953 1954 PetscFunctionBegin; 1955 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1956 PetscFunctionReturn(0); 1957 } 1958 1959 #undef __FUNCT__ 1960 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1961 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1962 { 1963 PetscErrorCode ierr; 1964 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1965 PetscBLASInt bnz,one=1; 1966 Mat_SeqBAIJ *x,*y; 1967 1968 PetscFunctionBegin; 1969 if (str == SAME_NONZERO_PATTERN) { 1970 PetscScalar alpha = a; 1971 x = (Mat_SeqBAIJ*)xx->A->data; 1972 y = (Mat_SeqBAIJ*)yy->A->data; 1973 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1974 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1975 x = (Mat_SeqBAIJ*)xx->B->data; 1976 y = (Mat_SeqBAIJ*)yy->B->data; 1977 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1978 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1979 } else { 1980 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1981 } 1982 PetscFunctionReturn(0); 1983 } 1984 1985 #undef __FUNCT__ 1986 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1987 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1988 { 1989 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1990 PetscErrorCode ierr; 1991 1992 PetscFunctionBegin; 1993 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1994 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1995 PetscFunctionReturn(0); 1996 } 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2000 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2001 { 2002 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2003 PetscErrorCode ierr; 2004 2005 PetscFunctionBegin; 2006 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2007 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2008 PetscFunctionReturn(0); 2009 } 2010 2011 #undef __FUNCT__ 2012 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2013 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2014 { 2015 PetscErrorCode ierr; 2016 IS iscol_local; 2017 PetscInt csize; 2018 2019 PetscFunctionBegin; 2020 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2021 if (call == MAT_REUSE_MATRIX) { 2022 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2023 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2024 } else { 2025 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2026 } 2027 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2028 if (call == MAT_INITIAL_MATRIX) { 2029 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2030 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2031 } 2032 PetscFunctionReturn(0); 2033 } 2034 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2037 /* 2038 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2039 in local and then by concatenating the local matrices the end result. 2040 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2041 */ 2042 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2043 { 2044 PetscErrorCode ierr; 2045 PetscMPIInt rank,size; 2046 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2047 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2048 Mat M,Mreuse; 2049 MatScalar *vwork,*aa; 2050 MPI_Comm comm; 2051 IS isrow_new, iscol_new; 2052 PetscBool idflag,allrows, allcols; 2053 Mat_SeqBAIJ *aij; 2054 2055 PetscFunctionBegin; 2056 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2057 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2058 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2059 /* The compression and expansion should be avoided. Doesn't point 2060 out errors, might change the indices, hence buggey */ 2061 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2062 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2063 2064 /* Check for special case: each processor gets entire matrix columns */ 2065 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2066 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2067 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 2068 else allcols = PETSC_FALSE; 2069 2070 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2071 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2072 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 2073 else allrows = PETSC_FALSE; 2074 2075 if (call == MAT_REUSE_MATRIX) { 2076 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2077 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2078 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2079 } else { 2080 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2081 } 2082 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2083 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2084 /* 2085 m - number of local rows 2086 n - number of columns (same on all processors) 2087 rstart - first row in new global matrix generated 2088 */ 2089 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2090 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2091 m = m/bs; 2092 n = n/bs; 2093 2094 if (call == MAT_INITIAL_MATRIX) { 2095 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2096 ii = aij->i; 2097 jj = aij->j; 2098 2099 /* 2100 Determine the number of non-zeros in the diagonal and off-diagonal 2101 portions of the matrix in order to do correct preallocation 2102 */ 2103 2104 /* first get start and end of "diagonal" columns */ 2105 if (csize == PETSC_DECIDE) { 2106 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2107 if (mglobal == n*bs) { /* square matrix */ 2108 nlocal = m; 2109 } else { 2110 nlocal = n/size + ((n % size) > rank); 2111 } 2112 } else { 2113 nlocal = csize/bs; 2114 } 2115 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2116 rstart = rend - nlocal; 2117 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2118 2119 /* next, compute all the lengths */ 2120 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2121 olens = dlens + m; 2122 for (i=0; i<m; i++) { 2123 jend = ii[i+1] - ii[i]; 2124 olen = 0; 2125 dlen = 0; 2126 for (j=0; j<jend; j++) { 2127 if (*jj < rstart || *jj >= rend) olen++; 2128 else dlen++; 2129 jj++; 2130 } 2131 olens[i] = olen; 2132 dlens[i] = dlen; 2133 } 2134 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2135 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2136 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2137 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2138 ierr = PetscFree(dlens);CHKERRQ(ierr); 2139 } else { 2140 PetscInt ml,nl; 2141 2142 M = *newmat; 2143 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2144 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2145 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2146 /* 2147 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2148 rather than the slower MatSetValues(). 2149 */ 2150 M->was_assembled = PETSC_TRUE; 2151 M->assembled = PETSC_FALSE; 2152 } 2153 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2154 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2155 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2156 ii = aij->i; 2157 jj = aij->j; 2158 aa = aij->a; 2159 for (i=0; i<m; i++) { 2160 row = rstart/bs + i; 2161 nz = ii[i+1] - ii[i]; 2162 cwork = jj; jj += nz; 2163 vwork = aa; aa += nz*bs*bs; 2164 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2165 } 2166 2167 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2168 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2169 *newmat = M; 2170 2171 /* save submatrix used in processor for next request */ 2172 if (call == MAT_INITIAL_MATRIX) { 2173 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2174 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2175 } 2176 PetscFunctionReturn(0); 2177 } 2178 2179 #undef __FUNCT__ 2180 #define __FUNCT__ "MatPermute_MPIBAIJ" 2181 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2182 { 2183 MPI_Comm comm,pcomm; 2184 PetscInt first,rlocal_size,clocal_size,nrows; 2185 const PetscInt *rows; 2186 PetscMPIInt size; 2187 IS crowp,growp,irowp,lrowp,lcolp; 2188 PetscErrorCode ierr; 2189 2190 PetscFunctionBegin; 2191 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2192 /* make a collective version of 'rowp' */ 2193 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2194 if (pcomm==comm) { 2195 crowp = rowp; 2196 } else { 2197 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2198 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2199 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2200 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2201 } 2202 /* collect the global row permutation and invert it */ 2203 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2204 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2205 if (pcomm!=comm) { 2206 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2207 } 2208 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2209 ierr = ISDestroy(&growp);CHKERRQ(ierr); 2210 /* get the local target indices */ 2211 ierr = MatGetOwnershipRange(A,&first,NULL);CHKERRQ(ierr); 2212 ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr); 2213 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2214 ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2215 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2216 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2217 /* the column permutation is so much easier; 2218 make a local version of 'colp' and invert it */ 2219 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2220 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2221 if (size==1) { 2222 lcolp = colp; 2223 } else { 2224 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2225 } 2226 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2227 /* now we just get the submatrix */ 2228 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2229 if (size>1) { 2230 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2231 } 2232 /* clean up */ 2233 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2234 PetscFunctionReturn(0); 2235 } 2236 2237 #undef __FUNCT__ 2238 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2239 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2240 { 2241 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2242 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2243 2244 PetscFunctionBegin; 2245 if (nghosts) *nghosts = B->nbs; 2246 if (ghosts) *ghosts = baij->garray; 2247 PetscFunctionReturn(0); 2248 } 2249 2250 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat); 2251 2252 #undef __FUNCT__ 2253 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2254 /* 2255 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2256 */ 2257 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2258 { 2259 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2260 PetscErrorCode ierr; 2261 PetscMPIInt size,*ncolsonproc,*disp,nn; 2262 PetscInt bs,i,n,nrows,j,k,m,ncols,col; 2263 const PetscInt *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj,*ltog; 2264 PetscInt nis = iscoloring->n,nctot,*cols; 2265 PetscInt *rowhit,M,cstart,cend,colb; 2266 PetscInt *columnsforrow,l; 2267 IS *isa; 2268 PetscBool done,flg; 2269 ISLocalToGlobalMapping map = mat->cmap->bmapping; 2270 PetscInt ctype=c->ctype; 2271 2272 PetscFunctionBegin; 2273 if (!mat->assembled) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2274 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2275 2276 if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,<og);CHKERRQ(ierr);} 2277 else ltog = NULL; 2278 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2279 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2280 2281 M = mat->rmap->n/bs; 2282 cstart = mat->cmap->rstart/bs; 2283 cend = mat->cmap->rend/bs; 2284 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2285 c->N = mat->cmap->N/bs; 2286 c->m = mat->rmap->n/bs; 2287 c->rstart = mat->rmap->rstart/bs; 2288 2289 c->ncolors = nis; 2290 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2291 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2292 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2293 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2294 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2295 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2296 2297 /* Allow access to data structures of local part of matrix */ 2298 if (!baij->colmap) { 2299 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2300 } 2301 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2302 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2303 2304 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2305 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2306 2307 for (i=0; i<nis; i++) { 2308 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2309 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2310 2311 c->ncolumns[i] = n; 2312 if (n) { 2313 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2314 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2315 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2316 } else { 2317 c->columns[i] = 0; 2318 } 2319 2320 if (ctype == IS_COLORING_GLOBAL) { 2321 /* Determine the total (parallel) number of columns of this color */ 2322 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 2323 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2324 2325 ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr); 2326 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2327 nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j]; 2328 if (!nctot) { 2329 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2330 } 2331 2332 disp[0] = 0; 2333 for (j=1; j<size; j++) { 2334 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2335 } 2336 2337 /* Get complete list of columns for color on each processor */ 2338 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2339 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2340 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2341 } else if (ctype == IS_COLORING_GHOSTED) { 2342 /* Determine local number of columns of this color on this process, including ghost points */ 2343 nctot = n; 2344 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2345 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2346 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2347 2348 /* 2349 Mark all rows affect by these columns 2350 */ 2351 /* Temporary option to allow for debugging/testing */ 2352 flg = PETSC_FALSE; 2353 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 2354 if (!flg) { /*-----------------------------------------------------------------------------*/ 2355 /* crude, fast version */ 2356 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2357 /* loop over columns*/ 2358 for (j=0; j<nctot; j++) { 2359 if (ctype == IS_COLORING_GHOSTED) { 2360 col = ltog[cols[j]]; 2361 } else { 2362 col = cols[j]; 2363 } 2364 if (col >= cstart && col < cend) { 2365 /* column is in diagonal block of matrix */ 2366 rows = A_cj + A_ci[col-cstart]; 2367 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2368 } else { 2369 #if defined(PETSC_USE_CTABLE) 2370 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2371 colb--; 2372 #else 2373 colb = baij->colmap[col] - 1; 2374 #endif 2375 if (colb == -1) { 2376 m = 0; 2377 } else { 2378 colb = colb/bs; 2379 rows = B_cj + B_ci[colb]; 2380 m = B_ci[colb+1] - B_ci[colb]; 2381 } 2382 } 2383 /* loop over columns marking them in rowhit */ 2384 for (k=0; k<m; k++) { 2385 rowhit[*rows++] = col + 1; 2386 } 2387 } 2388 2389 /* count the number of hits */ 2390 nrows = 0; 2391 for (j=0; j<M; j++) { 2392 if (rowhit[j]) nrows++; 2393 } 2394 c->nrows[i] = nrows; 2395 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2396 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2397 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2398 nrows = 0; 2399 for (j=0; j<M; j++) { 2400 if (rowhit[j]) { 2401 c->rows[i][nrows] = j; 2402 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2403 nrows++; 2404 } 2405 } 2406 } else { /*-------------------------------------------------------------------------------*/ 2407 /* slow version, using rowhit as a linked list */ 2408 PetscInt currentcol,fm,mfm; 2409 rowhit[M] = M; 2410 nrows = 0; 2411 /* loop over columns*/ 2412 for (j=0; j<nctot; j++) { 2413 if (ctype == IS_COLORING_GHOSTED) { 2414 col = ltog[cols[j]]; 2415 } else { 2416 col = cols[j]; 2417 } 2418 if (col >= cstart && col < cend) { 2419 /* column is in diagonal block of matrix */ 2420 rows = A_cj + A_ci[col-cstart]; 2421 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2422 } else { 2423 #if defined(PETSC_USE_CTABLE) 2424 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2425 colb--; 2426 #else 2427 colb = baij->colmap[col] - 1; 2428 #endif 2429 if (colb == -1) { 2430 m = 0; 2431 } else { 2432 colb = colb/bs; 2433 rows = B_cj + B_ci[colb]; 2434 m = B_ci[colb+1] - B_ci[colb]; 2435 } 2436 } 2437 2438 /* loop over columns marking them in rowhit */ 2439 fm = M; /* fm points to first entry in linked list */ 2440 for (k=0; k<m; k++) { 2441 currentcol = *rows++; 2442 /* is it already in the list? */ 2443 do { 2444 mfm = fm; 2445 fm = rowhit[fm]; 2446 } while (fm < currentcol); 2447 /* not in list so add it */ 2448 if (fm != currentcol) { 2449 nrows++; 2450 columnsforrow[currentcol] = col; 2451 /* next three lines insert new entry into linked list */ 2452 rowhit[mfm] = currentcol; 2453 rowhit[currentcol] = fm; 2454 fm = currentcol; 2455 /* fm points to present position in list since we know the columns are sorted */ 2456 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2457 } 2458 } 2459 c->nrows[i] = nrows; 2460 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2461 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2462 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2463 /* now store the linked list of rows into c->rows[i] */ 2464 nrows = 0; 2465 fm = rowhit[M]; 2466 do { 2467 c->rows[i][nrows] = fm; 2468 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2469 fm = rowhit[fm]; 2470 } while (fm < M); 2471 } /* ---------------------------------------------------------------------------------------*/ 2472 ierr = PetscFree(cols);CHKERRQ(ierr); 2473 } 2474 2475 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2476 /* 2477 vscale will contain the "diagonal" on processor scalings followed by the off processor 2478 */ 2479 if (ctype == IS_COLORING_GLOBAL) { 2480 PetscInt *garray; 2481 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2482 for (i=0; i<baij->B->cmap->n/bs; i++) { 2483 for (j=0; j<bs; j++) { 2484 garray[i*bs+j] = bs*baij->garray[i]+j; 2485 } 2486 } 2487 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2488 ierr = PetscFree(garray);CHKERRQ(ierr); 2489 CHKMEMQ; 2490 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2491 for (k=0; k<c->ncolors; k++) { 2492 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2493 for (l=0; l<c->nrows[k]; l++) { 2494 col = c->columnsforrow[k][l]; 2495 if (col >= cstart && col < cend) { 2496 /* column is in diagonal block of matrix */ 2497 colb = col - cstart; 2498 } else { 2499 /* column is in "off-processor" part */ 2500 #if defined(PETSC_USE_CTABLE) 2501 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2502 colb--; 2503 #else 2504 colb = baij->colmap[col] - 1; 2505 #endif 2506 colb = colb/bs; 2507 colb += cend - cstart; 2508 } 2509 c->vscaleforrow[k][l] = colb; 2510 } 2511 } 2512 } else if (ctype == IS_COLORING_GHOSTED) { 2513 /* Get gtol mapping */ 2514 PetscInt N = mat->cmap->N,nlocal,*gtol; 2515 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2516 for (i=0; i<N; i++) gtol[i] = -1; 2517 ierr = ISLocalToGlobalMappingGetSize(map,&nlocal);CHKERRQ(ierr); 2518 for (i=0; i<nlocal; i++) gtol[ltog[i]] = i; 2519 2520 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2521 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2522 for (k=0; k<c->ncolors; k++) { 2523 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2524 for (l=0; l<c->nrows[k]; l++) { 2525 col = c->columnsforrow[k][l]; /* global column index */ 2526 2527 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2528 } 2529 } 2530 ierr = PetscFree(gtol);CHKERRQ(ierr); 2531 } 2532 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2533 2534 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2535 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2536 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2537 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2538 if (map) {ierr = ISLocalToGlobalMappingRestoreIndices(map,<og);CHKERRQ(ierr);} 2539 CHKMEMQ; 2540 PetscFunctionReturn(0); 2541 } 2542 2543 #undef __FUNCT__ 2544 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2545 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2546 { 2547 Mat B; 2548 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2549 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2550 Mat_SeqAIJ *b; 2551 PetscErrorCode ierr; 2552 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2553 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2554 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2555 2556 PetscFunctionBegin; 2557 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2558 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2559 2560 /* ---------------------------------------------------------------- 2561 Tell every processor the number of nonzeros per row 2562 */ 2563 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2564 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2565 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2566 } 2567 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2568 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2569 displs = recvcounts + size; 2570 for (i=0; i<size; i++) { 2571 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2572 displs[i] = A->rmap->range[i]/bs; 2573 } 2574 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2575 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2576 #else 2577 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2578 #endif 2579 /* --------------------------------------------------------------- 2580 Create the sequential matrix of the same type as the local block diagonal 2581 */ 2582 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2583 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2584 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2585 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2586 b = (Mat_SeqAIJ*)B->data; 2587 2588 /*-------------------------------------------------------------------- 2589 Copy my part of matrix column indices over 2590 */ 2591 sendcount = ad->nz + bd->nz; 2592 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2593 a_jsendbuf = ad->j; 2594 b_jsendbuf = bd->j; 2595 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2596 cnt = 0; 2597 for (i=0; i<n; i++) { 2598 2599 /* put in lower diagonal portion */ 2600 m = bd->i[i+1] - bd->i[i]; 2601 while (m > 0) { 2602 /* is it above diagonal (in bd (compressed) numbering) */ 2603 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2604 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2605 m--; 2606 } 2607 2608 /* put in diagonal portion */ 2609 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2610 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2611 } 2612 2613 /* put in upper diagonal portion */ 2614 while (m-- > 0) { 2615 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2616 } 2617 } 2618 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2619 2620 /*-------------------------------------------------------------------- 2621 Gather all column indices to all processors 2622 */ 2623 for (i=0; i<size; i++) { 2624 recvcounts[i] = 0; 2625 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2626 recvcounts[i] += lens[j]; 2627 } 2628 } 2629 displs[0] = 0; 2630 for (i=1; i<size; i++) { 2631 displs[i] = displs[i-1] + recvcounts[i-1]; 2632 } 2633 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2634 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2635 #else 2636 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2637 #endif 2638 /*-------------------------------------------------------------------- 2639 Assemble the matrix into useable form (note numerical values not yet set) 2640 */ 2641 /* set the b->ilen (length of each row) values */ 2642 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2643 /* set the b->i indices */ 2644 b->i[0] = 0; 2645 for (i=1; i<=A->rmap->N/bs; i++) { 2646 b->i[i] = b->i[i-1] + lens[i-1]; 2647 } 2648 ierr = PetscFree(lens);CHKERRQ(ierr); 2649 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2650 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2651 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2652 2653 if (A->symmetric) { 2654 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2655 } else if (A->hermitian) { 2656 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2657 } else if (A->structurally_symmetric) { 2658 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2659 } 2660 *newmat = B; 2661 PetscFunctionReturn(0); 2662 } 2663 2664 #undef __FUNCT__ 2665 #define __FUNCT__ "MatSOR_MPIBAIJ" 2666 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2667 { 2668 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2669 PetscErrorCode ierr; 2670 Vec bb1 = 0; 2671 2672 PetscFunctionBegin; 2673 if (flag == SOR_APPLY_UPPER) { 2674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2675 PetscFunctionReturn(0); 2676 } 2677 2678 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2679 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2680 } 2681 2682 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2683 if (flag & SOR_ZERO_INITIAL_GUESS) { 2684 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2685 its--; 2686 } 2687 2688 while (its--) { 2689 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2690 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2691 2692 /* update rhs: bb1 = bb - B*x */ 2693 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2694 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2695 2696 /* local sweep */ 2697 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2698 } 2699 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2700 if (flag & SOR_ZERO_INITIAL_GUESS) { 2701 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2702 its--; 2703 } 2704 while (its--) { 2705 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2706 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2707 2708 /* update rhs: bb1 = bb - B*x */ 2709 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2710 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2711 2712 /* local sweep */ 2713 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2714 } 2715 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2716 if (flag & SOR_ZERO_INITIAL_GUESS) { 2717 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2718 its--; 2719 } 2720 while (its--) { 2721 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2722 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2723 2724 /* update rhs: bb1 = bb - B*x */ 2725 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2726 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2727 2728 /* local sweep */ 2729 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2730 } 2731 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2732 2733 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2734 PetscFunctionReturn(0); 2735 } 2736 2737 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2738 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2741 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2742 { 2743 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2744 PetscErrorCode ierr; 2745 2746 PetscFunctionBegin; 2747 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2748 PetscFunctionReturn(0); 2749 } 2750 2751 2752 /* -------------------------------------------------------------------*/ 2753 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2754 MatGetRow_MPIBAIJ, 2755 MatRestoreRow_MPIBAIJ, 2756 MatMult_MPIBAIJ, 2757 /* 4*/ MatMultAdd_MPIBAIJ, 2758 MatMultTranspose_MPIBAIJ, 2759 MatMultTransposeAdd_MPIBAIJ, 2760 0, 2761 0, 2762 0, 2763 /*10*/ 0, 2764 0, 2765 0, 2766 MatSOR_MPIBAIJ, 2767 MatTranspose_MPIBAIJ, 2768 /*15*/ MatGetInfo_MPIBAIJ, 2769 MatEqual_MPIBAIJ, 2770 MatGetDiagonal_MPIBAIJ, 2771 MatDiagonalScale_MPIBAIJ, 2772 MatNorm_MPIBAIJ, 2773 /*20*/ MatAssemblyBegin_MPIBAIJ, 2774 MatAssemblyEnd_MPIBAIJ, 2775 MatSetOption_MPIBAIJ, 2776 MatZeroEntries_MPIBAIJ, 2777 /*24*/ MatZeroRows_MPIBAIJ, 2778 0, 2779 0, 2780 0, 2781 0, 2782 /*29*/ MatSetUp_MPIBAIJ, 2783 0, 2784 0, 2785 0, 2786 0, 2787 /*34*/ MatDuplicate_MPIBAIJ, 2788 0, 2789 0, 2790 0, 2791 0, 2792 /*39*/ MatAXPY_MPIBAIJ, 2793 MatGetSubMatrices_MPIBAIJ, 2794 MatIncreaseOverlap_MPIBAIJ, 2795 MatGetValues_MPIBAIJ, 2796 MatCopy_MPIBAIJ, 2797 /*44*/ 0, 2798 MatScale_MPIBAIJ, 2799 0, 2800 0, 2801 0, 2802 /*49*/ 0, 2803 0, 2804 0, 2805 0, 2806 0, 2807 /*54*/ MatFDColoringCreate_MPIBAIJ, 2808 0, 2809 MatSetUnfactored_MPIBAIJ, 2810 MatPermute_MPIBAIJ, 2811 MatSetValuesBlocked_MPIBAIJ, 2812 /*59*/ MatGetSubMatrix_MPIBAIJ, 2813 MatDestroy_MPIBAIJ, 2814 MatView_MPIBAIJ, 2815 0, 2816 0, 2817 /*64*/ 0, 2818 0, 2819 0, 2820 0, 2821 0, 2822 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2823 0, 2824 0, 2825 0, 2826 0, 2827 /*74*/ 0, 2828 MatFDColoringApply_BAIJ, 2829 0, 2830 0, 2831 0, 2832 /*79*/ 0, 2833 0, 2834 0, 2835 0, 2836 MatLoad_MPIBAIJ, 2837 /*84*/ 0, 2838 0, 2839 0, 2840 0, 2841 0, 2842 /*89*/ 0, 2843 0, 2844 0, 2845 0, 2846 0, 2847 /*94*/ 0, 2848 0, 2849 0, 2850 0, 2851 0, 2852 /*99*/ 0, 2853 0, 2854 0, 2855 0, 2856 0, 2857 /*104*/0, 2858 MatRealPart_MPIBAIJ, 2859 MatImaginaryPart_MPIBAIJ, 2860 0, 2861 0, 2862 /*109*/0, 2863 0, 2864 0, 2865 0, 2866 0, 2867 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2868 0, 2869 MatGetGhosts_MPIBAIJ, 2870 0, 2871 0, 2872 /*119*/0, 2873 0, 2874 0, 2875 0, 2876 0, 2877 /*124*/0, 2878 0, 2879 MatInvertBlockDiagonal_MPIBAIJ, 2880 0, 2881 0, 2882 /*129*/ 0, 2883 0, 2884 0, 2885 0, 2886 0, 2887 /*134*/ 0, 2888 0, 2889 0, 2890 0, 2891 0, 2892 /*139*/ 0, 2893 0 2894 }; 2895 2896 #undef __FUNCT__ 2897 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2898 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2899 { 2900 PetscFunctionBegin; 2901 *a = ((Mat_MPIBAIJ*)A->data)->A; 2902 PetscFunctionReturn(0); 2903 } 2904 2905 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2906 2907 #undef __FUNCT__ 2908 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2909 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2910 { 2911 PetscInt m,rstart,cstart,cend; 2912 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2913 const PetscInt *JJ =0; 2914 PetscScalar *values=0; 2915 PetscErrorCode ierr; 2916 2917 PetscFunctionBegin; 2918 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2919 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2920 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2921 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2922 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2923 m = B->rmap->n/bs; 2924 rstart = B->rmap->rstart/bs; 2925 cstart = B->cmap->rstart/bs; 2926 cend = B->cmap->rend/bs; 2927 2928 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2929 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2930 for (i=0; i<m; i++) { 2931 nz = ii[i+1] - ii[i]; 2932 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2933 nz_max = PetscMax(nz_max,nz); 2934 JJ = jj + ii[i]; 2935 for (j=0; j<nz; j++) { 2936 if (*JJ >= cstart) break; 2937 JJ++; 2938 } 2939 d = 0; 2940 for (; j<nz; j++) { 2941 if (*JJ++ >= cend) break; 2942 d++; 2943 } 2944 d_nnz[i] = d; 2945 o_nnz[i] = nz - d; 2946 } 2947 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2948 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2949 2950 values = (PetscScalar*)V; 2951 if (!values) { 2952 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2953 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2954 } 2955 for (i=0; i<m; i++) { 2956 PetscInt row = i + rstart; 2957 PetscInt ncols = ii[i+1] - ii[i]; 2958 const PetscInt *icols = jj + ii[i]; 2959 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2960 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2961 } 2962 2963 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2964 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2965 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2966 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2967 PetscFunctionReturn(0); 2968 } 2969 2970 #undef __FUNCT__ 2971 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2972 /*@C 2973 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2974 (the default parallel PETSc format). 2975 2976 Collective on MPI_Comm 2977 2978 Input Parameters: 2979 + A - the matrix 2980 . bs - the block size 2981 . i - the indices into j for the start of each local row (starts with zero) 2982 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2983 - v - optional values in the matrix 2984 2985 Level: developer 2986 2987 .keywords: matrix, aij, compressed row, sparse, parallel 2988 2989 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2990 @*/ 2991 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2992 { 2993 PetscErrorCode ierr; 2994 2995 PetscFunctionBegin; 2996 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2997 PetscValidType(B,1); 2998 PetscValidLogicalCollectiveInt(B,bs,2); 2999 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3000 PetscFunctionReturn(0); 3001 } 3002 3003 #undef __FUNCT__ 3004 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 3005 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 3006 { 3007 Mat_MPIBAIJ *b; 3008 PetscErrorCode ierr; 3009 PetscInt i; 3010 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3011 3012 PetscFunctionBegin; 3013 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3014 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3015 3016 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3017 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3018 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3019 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3020 3021 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3022 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3023 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3024 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3025 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3026 3027 if (d_nnz) { 3028 for (i=0; i<B->rmap->n/bs; i++) { 3029 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3030 } 3031 } 3032 if (o_nnz) { 3033 for (i=0; i<B->rmap->n/bs; i++) { 3034 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3035 } 3036 } 3037 3038 b = (Mat_MPIBAIJ*)B->data; 3039 b->bs2 = bs*bs; 3040 b->mbs = B->rmap->n/bs; 3041 b->nbs = B->cmap->n/bs; 3042 b->Mbs = B->rmap->N/bs; 3043 b->Nbs = B->cmap->N/bs; 3044 3045 for (i=0; i<=b->size; i++) { 3046 b->rangebs[i] = B->rmap->range[i]/bs; 3047 } 3048 b->rstartbs = B->rmap->rstart/bs; 3049 b->rendbs = B->rmap->rend/bs; 3050 b->cstartbs = B->cmap->rstart/bs; 3051 b->cendbs = B->cmap->rend/bs; 3052 3053 if (!B->preallocated) { 3054 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3055 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3056 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3057 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3058 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3059 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3060 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3061 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3062 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 3063 } 3064 3065 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3066 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3067 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3068 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3069 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3070 B->preallocated = PETSC_TRUE; 3071 PetscFunctionReturn(0); 3072 } 3073 3074 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3075 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3076 3077 #undef __FUNCT__ 3078 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3079 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 3080 { 3081 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3082 PetscErrorCode ierr; 3083 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3084 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3085 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3086 3087 PetscFunctionBegin; 3088 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3089 ii[0] = 0; 3090 CHKMEMQ; 3091 for (i=0; i<M; i++) { 3092 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3093 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3094 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3095 /* remove one from count of matrix has diagonal */ 3096 for (j=id[i]; j<id[i+1]; j++) { 3097 if (jd[j] == i) {ii[i+1]--;break;} 3098 } 3099 CHKMEMQ; 3100 } 3101 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3102 cnt = 0; 3103 for (i=0; i<M; i++) { 3104 for (j=io[i]; j<io[i+1]; j++) { 3105 if (garray[jo[j]] > rstart) break; 3106 jj[cnt++] = garray[jo[j]]; 3107 CHKMEMQ; 3108 } 3109 for (k=id[i]; k<id[i+1]; k++) { 3110 if (jd[k] != i) { 3111 jj[cnt++] = rstart + jd[k]; 3112 CHKMEMQ; 3113 } 3114 } 3115 for (; j<io[i+1]; j++) { 3116 jj[cnt++] = garray[jo[j]]; 3117 CHKMEMQ; 3118 } 3119 } 3120 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 3121 PetscFunctionReturn(0); 3122 } 3123 3124 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3125 3126 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 3127 3128 #undef __FUNCT__ 3129 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3130 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 3131 { 3132 PetscErrorCode ierr; 3133 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3134 Mat B; 3135 Mat_MPIAIJ *b; 3136 3137 PetscFunctionBegin; 3138 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 3139 3140 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3141 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3142 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3143 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3144 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 3145 b = (Mat_MPIAIJ*) B->data; 3146 3147 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3148 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3149 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3150 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3151 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3152 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3153 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3154 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3155 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3156 if (reuse == MAT_REUSE_MATRIX) { 3157 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3158 } else { 3159 *newmat = B; 3160 } 3161 PetscFunctionReturn(0); 3162 } 3163 3164 #if defined(PETSC_HAVE_MUMPS) 3165 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3166 #endif 3167 3168 /*MC 3169 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3170 3171 Options Database Keys: 3172 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3173 . -mat_block_size <bs> - set the blocksize used to store the matrix 3174 - -mat_use_hash_table <fact> 3175 3176 Level: beginner 3177 3178 .seealso: MatCreateMPIBAIJ 3179 M*/ 3180 3181 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3182 3183 #undef __FUNCT__ 3184 #define __FUNCT__ "MatCreate_MPIBAIJ" 3185 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3186 { 3187 Mat_MPIBAIJ *b; 3188 PetscErrorCode ierr; 3189 PetscBool flg; 3190 3191 PetscFunctionBegin; 3192 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3193 B->data = (void*)b; 3194 3195 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3196 B->assembled = PETSC_FALSE; 3197 3198 B->insertmode = NOT_SET_VALUES; 3199 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3200 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3201 3202 /* build local table of row and column ownerships */ 3203 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3204 3205 /* build cache for off array entries formed */ 3206 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 3207 3208 b->donotstash = PETSC_FALSE; 3209 b->colmap = NULL; 3210 b->garray = NULL; 3211 b->roworiented = PETSC_TRUE; 3212 3213 /* stuff used in block assembly */ 3214 b->barray = 0; 3215 3216 /* stuff used for matrix vector multiply */ 3217 b->lvec = 0; 3218 b->Mvctx = 0; 3219 3220 /* stuff for MatGetRow() */ 3221 b->rowindices = 0; 3222 b->rowvalues = 0; 3223 b->getrowactive = PETSC_FALSE; 3224 3225 /* hash table stuff */ 3226 b->ht = 0; 3227 b->hd = 0; 3228 b->ht_size = 0; 3229 b->ht_flag = PETSC_FALSE; 3230 b->ht_fact = 0; 3231 b->ht_total_ct = 0; 3232 b->ht_insert_ct = 0; 3233 3234 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3235 b->ijonly = PETSC_FALSE; 3236 3237 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3238 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 3239 if (flg) { 3240 PetscReal fact = 1.39; 3241 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3242 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3243 if (fact <= 1.0) fact = 1.39; 3244 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3245 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3246 } 3247 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3248 3249 #if defined(PETSC_HAVE_MUMPS) 3250 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3251 #endif 3252 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C","MatConvert_MPIBAIJ_MPIAdj",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3253 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C","MatConvert_MPIBAIJ_MPIAIJ",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3254 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C","MatConvert_MPIBAIJ_MPISBAIJ",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3255 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_MPIBAIJ",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3256 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_MPIBAIJ",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3257 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C","MatGetDiagonalBlock_MPIBAIJ",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3258 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C","MatMPIBAIJSetPreallocation_MPIBAIJ",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3259 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C","MatMPIBAIJSetPreallocationCSR_MPIBAIJ",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3260 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C","MatDiagonalScaleLocal_MPIBAIJ",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3261 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C","MatSetHashTableFactor_MPIBAIJ",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3262 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C","MatConvert_MPIBAIJ_MPIBSTRM",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3263 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3264 PetscFunctionReturn(0); 3265 } 3266 3267 /*MC 3268 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3269 3270 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3271 and MATMPIBAIJ otherwise. 3272 3273 Options Database Keys: 3274 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3275 3276 Level: beginner 3277 3278 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3279 M*/ 3280 3281 #undef __FUNCT__ 3282 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3283 /*@C 3284 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3285 (block compressed row). For good matrix assembly performance 3286 the user should preallocate the matrix storage by setting the parameters 3287 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3288 performance can be increased by more than a factor of 50. 3289 3290 Collective on Mat 3291 3292 Input Parameters: 3293 + A - the matrix 3294 . bs - size of blockk 3295 . d_nz - number of block nonzeros per block row in diagonal portion of local 3296 submatrix (same for all local rows) 3297 . d_nnz - array containing the number of block nonzeros in the various block rows 3298 of the in diagonal portion of the local (possibly different for each block 3299 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3300 set it even if it is zero. 3301 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3302 submatrix (same for all local rows). 3303 - o_nnz - array containing the number of nonzeros in the various block rows of the 3304 off-diagonal portion of the local submatrix (possibly different for 3305 each block row) or NULL. 3306 3307 If the *_nnz parameter is given then the *_nz parameter is ignored 3308 3309 Options Database Keys: 3310 + -mat_block_size - size of the blocks to use 3311 - -mat_use_hash_table <fact> 3312 3313 Notes: 3314 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3315 than it must be used on all processors that share the object for that argument. 3316 3317 Storage Information: 3318 For a square global matrix we define each processor's diagonal portion 3319 to be its local rows and the corresponding columns (a square submatrix); 3320 each processor's off-diagonal portion encompasses the remainder of the 3321 local matrix (a rectangular submatrix). 3322 3323 The user can specify preallocated storage for the diagonal part of 3324 the local submatrix with either d_nz or d_nnz (not both). Set 3325 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3326 memory allocation. Likewise, specify preallocated storage for the 3327 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3328 3329 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3330 the figure below we depict these three local rows and all columns (0-11). 3331 3332 .vb 3333 0 1 2 3 4 5 6 7 8 9 10 11 3334 ------------------- 3335 row 3 | o o o d d d o o o o o o 3336 row 4 | o o o d d d o o o o o o 3337 row 5 | o o o d d d o o o o o o 3338 ------------------- 3339 .ve 3340 3341 Thus, any entries in the d locations are stored in the d (diagonal) 3342 submatrix, and any entries in the o locations are stored in the 3343 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3344 stored simply in the MATSEQBAIJ format for compressed row storage. 3345 3346 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3347 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3348 In general, for PDE problems in which most nonzeros are near the diagonal, 3349 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3350 or you will get TERRIBLE performance; see the users' manual chapter on 3351 matrices. 3352 3353 You can call MatGetInfo() to get information on how effective the preallocation was; 3354 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3355 You can also run with the option -info and look for messages with the string 3356 malloc in them to see if additional memory allocation was needed. 3357 3358 Level: intermediate 3359 3360 .keywords: matrix, block, aij, compressed row, sparse, parallel 3361 3362 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3363 @*/ 3364 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3365 { 3366 PetscErrorCode ierr; 3367 3368 PetscFunctionBegin; 3369 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3370 PetscValidType(B,1); 3371 PetscValidLogicalCollectiveInt(B,bs,2); 3372 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3373 PetscFunctionReturn(0); 3374 } 3375 3376 #undef __FUNCT__ 3377 #define __FUNCT__ "MatCreateBAIJ" 3378 /*@C 3379 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3380 (block compressed row). For good matrix assembly performance 3381 the user should preallocate the matrix storage by setting the parameters 3382 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3383 performance can be increased by more than a factor of 50. 3384 3385 Collective on MPI_Comm 3386 3387 Input Parameters: 3388 + comm - MPI communicator 3389 . bs - size of blockk 3390 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3391 This value should be the same as the local size used in creating the 3392 y vector for the matrix-vector product y = Ax. 3393 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3394 This value should be the same as the local size used in creating the 3395 x vector for the matrix-vector product y = Ax. 3396 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3397 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3398 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3399 submatrix (same for all local rows) 3400 . d_nnz - array containing the number of nonzero blocks in the various block rows 3401 of the in diagonal portion of the local (possibly different for each block 3402 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3403 and set it even if it is zero. 3404 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3405 submatrix (same for all local rows). 3406 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3407 off-diagonal portion of the local submatrix (possibly different for 3408 each block row) or NULL. 3409 3410 Output Parameter: 3411 . A - the matrix 3412 3413 Options Database Keys: 3414 + -mat_block_size - size of the blocks to use 3415 - -mat_use_hash_table <fact> 3416 3417 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3418 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3419 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3420 3421 Notes: 3422 If the *_nnz parameter is given then the *_nz parameter is ignored 3423 3424 A nonzero block is any block that as 1 or more nonzeros in it 3425 3426 The user MUST specify either the local or global matrix dimensions 3427 (possibly both). 3428 3429 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3430 than it must be used on all processors that share the object for that argument. 3431 3432 Storage Information: 3433 For a square global matrix we define each processor's diagonal portion 3434 to be its local rows and the corresponding columns (a square submatrix); 3435 each processor's off-diagonal portion encompasses the remainder of the 3436 local matrix (a rectangular submatrix). 3437 3438 The user can specify preallocated storage for the diagonal part of 3439 the local submatrix with either d_nz or d_nnz (not both). Set 3440 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3441 memory allocation. Likewise, specify preallocated storage for the 3442 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3443 3444 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3445 the figure below we depict these three local rows and all columns (0-11). 3446 3447 .vb 3448 0 1 2 3 4 5 6 7 8 9 10 11 3449 ------------------- 3450 row 3 | o o o d d d o o o o o o 3451 row 4 | o o o d d d o o o o o o 3452 row 5 | o o o d d d o o o o o o 3453 ------------------- 3454 .ve 3455 3456 Thus, any entries in the d locations are stored in the d (diagonal) 3457 submatrix, and any entries in the o locations are stored in the 3458 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3459 stored simply in the MATSEQBAIJ format for compressed row storage. 3460 3461 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3462 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3463 In general, for PDE problems in which most nonzeros are near the diagonal, 3464 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3465 or you will get TERRIBLE performance; see the users' manual chapter on 3466 matrices. 3467 3468 Level: intermediate 3469 3470 .keywords: matrix, block, aij, compressed row, sparse, parallel 3471 3472 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3473 @*/ 3474 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3475 { 3476 PetscErrorCode ierr; 3477 PetscMPIInt size; 3478 3479 PetscFunctionBegin; 3480 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3481 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3482 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3483 if (size > 1) { 3484 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3485 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3486 } else { 3487 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3488 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3489 } 3490 PetscFunctionReturn(0); 3491 } 3492 3493 #undef __FUNCT__ 3494 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3495 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3496 { 3497 Mat mat; 3498 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3499 PetscErrorCode ierr; 3500 PetscInt len=0; 3501 3502 PetscFunctionBegin; 3503 *newmat = 0; 3504 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3505 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3506 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3507 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3508 3509 mat->factortype = matin->factortype; 3510 mat->preallocated = PETSC_TRUE; 3511 mat->assembled = PETSC_TRUE; 3512 mat->insertmode = NOT_SET_VALUES; 3513 3514 a = (Mat_MPIBAIJ*)mat->data; 3515 mat->rmap->bs = matin->rmap->bs; 3516 a->bs2 = oldmat->bs2; 3517 a->mbs = oldmat->mbs; 3518 a->nbs = oldmat->nbs; 3519 a->Mbs = oldmat->Mbs; 3520 a->Nbs = oldmat->Nbs; 3521 3522 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3523 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3524 3525 a->size = oldmat->size; 3526 a->rank = oldmat->rank; 3527 a->donotstash = oldmat->donotstash; 3528 a->roworiented = oldmat->roworiented; 3529 a->rowindices = 0; 3530 a->rowvalues = 0; 3531 a->getrowactive = PETSC_FALSE; 3532 a->barray = 0; 3533 a->rstartbs = oldmat->rstartbs; 3534 a->rendbs = oldmat->rendbs; 3535 a->cstartbs = oldmat->cstartbs; 3536 a->cendbs = oldmat->cendbs; 3537 3538 /* hash table stuff */ 3539 a->ht = 0; 3540 a->hd = 0; 3541 a->ht_size = 0; 3542 a->ht_flag = oldmat->ht_flag; 3543 a->ht_fact = oldmat->ht_fact; 3544 a->ht_total_ct = 0; 3545 a->ht_insert_ct = 0; 3546 3547 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3548 if (oldmat->colmap) { 3549 #if defined(PETSC_USE_CTABLE) 3550 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3551 #else 3552 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3553 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3554 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3555 #endif 3556 } else a->colmap = 0; 3557 3558 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3559 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3560 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3561 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3562 } else a->garray = 0; 3563 3564 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3565 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3566 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3567 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3568 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3569 3570 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3571 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3572 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3573 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3574 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3575 *newmat = mat; 3576 PetscFunctionReturn(0); 3577 } 3578 3579 #undef __FUNCT__ 3580 #define __FUNCT__ "MatLoad_MPIBAIJ" 3581 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3582 { 3583 PetscErrorCode ierr; 3584 int fd; 3585 PetscInt i,nz,j,rstart,rend; 3586 PetscScalar *vals,*buf; 3587 MPI_Comm comm; 3588 MPI_Status status; 3589 PetscMPIInt rank,size,maxnz; 3590 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3591 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3592 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3593 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3594 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3595 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3596 3597 PetscFunctionBegin; 3598 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3599 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3600 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3601 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3602 3603 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3604 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3605 if (!rank) { 3606 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3607 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3608 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3609 } 3610 3611 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3612 3613 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3614 M = header[1]; N = header[2]; 3615 3616 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3617 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3618 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3619 3620 /* If global sizes are set, check if they are consistent with that given in the file */ 3621 if (sizesset) { 3622 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3623 } 3624 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3625 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3626 3627 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3628 3629 /* 3630 This code adds extra rows to make sure the number of rows is 3631 divisible by the blocksize 3632 */ 3633 Mbs = M/bs; 3634 extra_rows = bs - M + bs*Mbs; 3635 if (extra_rows == bs) extra_rows = 0; 3636 else Mbs++; 3637 if (extra_rows && !rank) { 3638 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3639 } 3640 3641 /* determine ownership of all rows */ 3642 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3643 mbs = Mbs/size + ((Mbs % size) > rank); 3644 m = mbs*bs; 3645 } else { /* User set */ 3646 m = newmat->rmap->n; 3647 mbs = m/bs; 3648 } 3649 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3650 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3651 3652 /* process 0 needs enough room for process with most rows */ 3653 if (!rank) { 3654 mmax = rowners[1]; 3655 for (i=2; i<=size; i++) { 3656 mmax = PetscMax(mmax,rowners[i]); 3657 } 3658 mmax*=bs; 3659 } else mmax = -1; /* unused, but compiler warns anyway */ 3660 3661 rowners[0] = 0; 3662 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3663 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3664 rstart = rowners[rank]; 3665 rend = rowners[rank+1]; 3666 3667 /* distribute row lengths to all processors */ 3668 ierr = PetscMalloc(m*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3669 if (!rank) { 3670 mend = m; 3671 if (size == 1) mend = mend - extra_rows; 3672 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3673 for (j=mend; j<m; j++) locrowlens[j] = 1; 3674 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3675 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3676 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3677 for (j=0; j<m; j++) { 3678 procsnz[0] += locrowlens[j]; 3679 } 3680 for (i=1; i<size; i++) { 3681 mend = browners[i+1] - browners[i]; 3682 if (i == size-1) mend = mend - extra_rows; 3683 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3684 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3685 /* calculate the number of nonzeros on each processor */ 3686 for (j=0; j<browners[i+1]-browners[i]; j++) { 3687 procsnz[i] += rowlengths[j]; 3688 } 3689 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3690 } 3691 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3692 } else { 3693 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3694 } 3695 3696 if (!rank) { 3697 /* determine max buffer needed and allocate it */ 3698 maxnz = procsnz[0]; 3699 for (i=1; i<size; i++) { 3700 maxnz = PetscMax(maxnz,procsnz[i]); 3701 } 3702 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3703 3704 /* read in my part of the matrix column indices */ 3705 nz = procsnz[0]; 3706 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3707 mycols = ibuf; 3708 if (size == 1) nz -= extra_rows; 3709 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3710 if (size == 1) { 3711 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3712 } 3713 3714 /* read in every ones (except the last) and ship off */ 3715 for (i=1; i<size-1; i++) { 3716 nz = procsnz[i]; 3717 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3718 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3719 } 3720 /* read in the stuff for the last proc */ 3721 if (size != 1) { 3722 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3723 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3724 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3725 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3726 } 3727 ierr = PetscFree(cols);CHKERRQ(ierr); 3728 } else { 3729 /* determine buffer space needed for message */ 3730 nz = 0; 3731 for (i=0; i<m; i++) { 3732 nz += locrowlens[i]; 3733 } 3734 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3735 mycols = ibuf; 3736 /* receive message of column indices*/ 3737 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3738 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3739 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3740 } 3741 3742 /* loop over local rows, determining number of off diagonal entries */ 3743 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3744 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3745 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3746 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3747 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3748 rowcount = 0; nzcount = 0; 3749 for (i=0; i<mbs; i++) { 3750 dcount = 0; 3751 odcount = 0; 3752 for (j=0; j<bs; j++) { 3753 kmax = locrowlens[rowcount]; 3754 for (k=0; k<kmax; k++) { 3755 tmp = mycols[nzcount++]/bs; 3756 if (!mask[tmp]) { 3757 mask[tmp] = 1; 3758 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3759 else masked1[dcount++] = tmp; 3760 } 3761 } 3762 rowcount++; 3763 } 3764 3765 dlens[i] = dcount; 3766 odlens[i] = odcount; 3767 3768 /* zero out the mask elements we set */ 3769 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3770 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3771 } 3772 3773 3774 if (!sizesset) { 3775 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3776 } 3777 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3778 3779 if (!rank) { 3780 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3781 /* read in my part of the matrix numerical values */ 3782 nz = procsnz[0]; 3783 vals = buf; 3784 mycols = ibuf; 3785 if (size == 1) nz -= extra_rows; 3786 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3787 if (size == 1) { 3788 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3789 } 3790 3791 /* insert into matrix */ 3792 jj = rstart*bs; 3793 for (i=0; i<m; i++) { 3794 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3795 mycols += locrowlens[i]; 3796 vals += locrowlens[i]; 3797 jj++; 3798 } 3799 /* read in other processors (except the last one) and ship out */ 3800 for (i=1; i<size-1; i++) { 3801 nz = procsnz[i]; 3802 vals = buf; 3803 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3804 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3805 } 3806 /* the last proc */ 3807 if (size != 1) { 3808 nz = procsnz[i] - extra_rows; 3809 vals = buf; 3810 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3811 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3812 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3813 } 3814 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3815 } else { 3816 /* receive numeric values */ 3817 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3818 3819 /* receive message of values*/ 3820 vals = buf; 3821 mycols = ibuf; 3822 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3823 3824 /* insert into matrix */ 3825 jj = rstart*bs; 3826 for (i=0; i<m; i++) { 3827 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3828 mycols += locrowlens[i]; 3829 vals += locrowlens[i]; 3830 jj++; 3831 } 3832 } 3833 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3834 ierr = PetscFree(buf);CHKERRQ(ierr); 3835 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3836 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3837 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3838 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3839 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3840 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3841 PetscFunctionReturn(0); 3842 } 3843 3844 #undef __FUNCT__ 3845 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3846 /*@ 3847 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3848 3849 Input Parameters: 3850 . mat - the matrix 3851 . fact - factor 3852 3853 Not Collective, each process can use a different factor 3854 3855 Level: advanced 3856 3857 Notes: 3858 This can also be set by the command line option: -mat_use_hash_table <fact> 3859 3860 .keywords: matrix, hashtable, factor, HT 3861 3862 .seealso: MatSetOption() 3863 @*/ 3864 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3865 { 3866 PetscErrorCode ierr; 3867 3868 PetscFunctionBegin; 3869 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3870 PetscFunctionReturn(0); 3871 } 3872 3873 #undef __FUNCT__ 3874 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3875 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3876 { 3877 Mat_MPIBAIJ *baij; 3878 3879 PetscFunctionBegin; 3880 baij = (Mat_MPIBAIJ*)mat->data; 3881 baij->ht_fact = fact; 3882 PetscFunctionReturn(0); 3883 } 3884 3885 #undef __FUNCT__ 3886 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3887 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3888 { 3889 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3890 3891 PetscFunctionBegin; 3892 *Ad = a->A; 3893 *Ao = a->B; 3894 *colmap = a->garray; 3895 PetscFunctionReturn(0); 3896 } 3897 3898 /* 3899 Special version for direct calls from Fortran (to eliminate two function call overheads 3900 */ 3901 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3902 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3903 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3904 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3905 #endif 3906 3907 #undef __FUNCT__ 3908 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3909 /*@C 3910 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3911 3912 Collective on Mat 3913 3914 Input Parameters: 3915 + mat - the matrix 3916 . min - number of input rows 3917 . im - input rows 3918 . nin - number of input columns 3919 . in - input columns 3920 . v - numerical values input 3921 - addvin - INSERT_VALUES or ADD_VALUES 3922 3923 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3924 3925 Level: advanced 3926 3927 .seealso: MatSetValuesBlocked() 3928 @*/ 3929 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3930 { 3931 /* convert input arguments to C version */ 3932 Mat mat = *matin; 3933 PetscInt m = *min, n = *nin; 3934 InsertMode addv = *addvin; 3935 3936 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3937 const MatScalar *value; 3938 MatScalar *barray = baij->barray; 3939 PetscBool roworiented = baij->roworiented; 3940 PetscErrorCode ierr; 3941 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3942 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3943 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3944 3945 PetscFunctionBegin; 3946 /* tasks normally handled by MatSetValuesBlocked() */ 3947 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3948 #if defined(PETSC_USE_DEBUG) 3949 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3950 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3951 #endif 3952 if (mat->assembled) { 3953 mat->was_assembled = PETSC_TRUE; 3954 mat->assembled = PETSC_FALSE; 3955 } 3956 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3957 3958 3959 if (!barray) { 3960 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3961 baij->barray = barray; 3962 } 3963 3964 if (roworiented) stepval = (n-1)*bs; 3965 else stepval = (m-1)*bs; 3966 3967 for (i=0; i<m; i++) { 3968 if (im[i] < 0) continue; 3969 #if defined(PETSC_USE_DEBUG) 3970 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3971 #endif 3972 if (im[i] >= rstart && im[i] < rend) { 3973 row = im[i] - rstart; 3974 for (j=0; j<n; j++) { 3975 /* If NumCol = 1 then a copy is not required */ 3976 if ((roworiented) && (n == 1)) { 3977 barray = (MatScalar*)v + i*bs2; 3978 } else if ((!roworiented) && (m == 1)) { 3979 barray = (MatScalar*)v + j*bs2; 3980 } else { /* Here a copy is required */ 3981 if (roworiented) { 3982 value = v + i*(stepval+bs)*bs + j*bs; 3983 } else { 3984 value = v + j*(stepval+bs)*bs + i*bs; 3985 } 3986 for (ii=0; ii<bs; ii++,value+=stepval) { 3987 for (jj=0; jj<bs; jj++) { 3988 *barray++ = *value++; 3989 } 3990 } 3991 barray -=bs2; 3992 } 3993 3994 if (in[j] >= cstart && in[j] < cend) { 3995 col = in[j] - cstart; 3996 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3997 } else if (in[j] < 0) continue; 3998 #if defined(PETSC_USE_DEBUG) 3999 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4000 #endif 4001 else { 4002 if (mat->was_assembled) { 4003 if (!baij->colmap) { 4004 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4005 } 4006 4007 #if defined(PETSC_USE_DEBUG) 4008 #if defined(PETSC_USE_CTABLE) 4009 { PetscInt data; 4010 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4011 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4012 } 4013 #else 4014 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4015 #endif 4016 #endif 4017 #if defined(PETSC_USE_CTABLE) 4018 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4019 col = (col - 1)/bs; 4020 #else 4021 col = (baij->colmap[in[j]] - 1)/bs; 4022 #endif 4023 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4024 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4025 col = in[j]; 4026 } 4027 } else col = in[j]; 4028 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4029 } 4030 } 4031 } else { 4032 if (!baij->donotstash) { 4033 if (roworiented) { 4034 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4035 } else { 4036 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4037 } 4038 } 4039 } 4040 } 4041 4042 /* task normally handled by MatSetValuesBlocked() */ 4043 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4044 PetscFunctionReturn(0); 4045 } 4046 4047 #undef __FUNCT__ 4048 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4049 /*@ 4050 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4051 CSR format the local rows. 4052 4053 Collective on MPI_Comm 4054 4055 Input Parameters: 4056 + comm - MPI communicator 4057 . bs - the block size, only a block size of 1 is supported 4058 . m - number of local rows (Cannot be PETSC_DECIDE) 4059 . n - This value should be the same as the local size used in creating the 4060 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4061 calculated if N is given) For square matrices n is almost always m. 4062 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4063 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4064 . i - row indices 4065 . j - column indices 4066 - a - matrix values 4067 4068 Output Parameter: 4069 . mat - the matrix 4070 4071 Level: intermediate 4072 4073 Notes: 4074 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4075 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4076 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4077 4078 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4079 4080 .keywords: matrix, aij, compressed row, sparse, parallel 4081 4082 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4083 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4084 @*/ 4085 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4086 { 4087 PetscErrorCode ierr; 4088 4089 PetscFunctionBegin; 4090 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4091 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4092 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4093 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4094 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4095 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4096 PetscFunctionReturn(0); 4097 } 4098