1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 9 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 10 { 11 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 12 PetscErrorCode ierr; 13 PetscInt i,*idxb = 0; 14 PetscScalar *va,*vb; 15 Vec vtmp; 16 17 PetscFunctionBegin; 18 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 19 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 20 if (idx) { 21 for (i=0; i<A->rmap->n; i++) { 22 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 23 } 24 } 25 26 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 27 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 28 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 29 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 30 31 for (i=0; i<A->rmap->n; i++) { 32 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 33 va[i] = vb[i]; 34 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 35 } 36 } 37 38 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 39 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 40 ierr = PetscFree(idxb);CHKERRQ(ierr); 41 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 47 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 48 { 49 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 54 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 60 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 61 { 62 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 63 PetscErrorCode ierr; 64 65 PetscFunctionBegin; 66 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 67 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 68 PetscFunctionReturn(0); 69 } 70 71 /* 72 Local utility routine that creates a mapping from the global column 73 number to the local number in the off-diagonal part of the local 74 storage of the matrix. This is done in a non scalable way since the 75 length of colmap equals the global matrix length. 76 */ 77 #undef __FUNCT__ 78 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 79 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 80 { 81 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 82 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 83 PetscErrorCode ierr; 84 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 85 86 PetscFunctionBegin; 87 #if defined(PETSC_USE_CTABLE) 88 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 89 for (i=0; i<nbs; i++) { 90 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 91 } 92 #else 93 ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr); 94 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 95 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 96 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 97 #endif 98 PetscFunctionReturn(0); 99 } 100 101 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 102 { \ 103 \ 104 brow = row/bs; \ 105 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 106 rmax = aimax[brow]; nrow = ailen[brow]; \ 107 bcol = col/bs; \ 108 ridx = row % bs; cidx = col % bs; \ 109 low = 0; high = nrow; \ 110 while (high-low > 3) { \ 111 t = (low+high)/2; \ 112 if (rp[t] > bcol) high = t; \ 113 else low = t; \ 114 } \ 115 for (_i=low; _i<high; _i++) { \ 116 if (rp[_i] > bcol) break; \ 117 if (rp[_i] == bcol) { \ 118 bap = ap + bs2*_i + bs*cidx + ridx; \ 119 if (addv == ADD_VALUES) *bap += value; \ 120 else *bap = value; \ 121 goto a_noinsert; \ 122 } \ 123 } \ 124 if (a->nonew == 1) goto a_noinsert; \ 125 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 126 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 127 N = nrow++ - 1; \ 128 /* shift up all the later entries in this row */ \ 129 for (ii=N; ii>=_i; ii--) { \ 130 rp[ii+1] = rp[ii]; \ 131 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 132 } \ 133 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 134 rp[_i] = bcol; \ 135 ap[bs2*_i + bs*cidx + ridx] = value; \ 136 a_noinsert:; \ 137 ailen[brow] = nrow; \ 138 } 139 140 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 141 { \ 142 brow = row/bs; \ 143 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 144 rmax = bimax[brow]; nrow = bilen[brow]; \ 145 bcol = col/bs; \ 146 ridx = row % bs; cidx = col % bs; \ 147 low = 0; high = nrow; \ 148 while (high-low > 3) { \ 149 t = (low+high)/2; \ 150 if (rp[t] > bcol) high = t; \ 151 else low = t; \ 152 } \ 153 for (_i=low; _i<high; _i++) { \ 154 if (rp[_i] > bcol) break; \ 155 if (rp[_i] == bcol) { \ 156 bap = ap + bs2*_i + bs*cidx + ridx; \ 157 if (addv == ADD_VALUES) *bap += value; \ 158 else *bap = value; \ 159 goto b_noinsert; \ 160 } \ 161 } \ 162 if (b->nonew == 1) goto b_noinsert; \ 163 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 164 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 165 N = nrow++ - 1; \ 166 /* shift up all the later entries in this row */ \ 167 for (ii=N; ii>=_i; ii--) { \ 168 rp[ii+1] = rp[ii]; \ 169 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 170 } \ 171 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 172 rp[_i] = bcol; \ 173 ap[bs2*_i + bs*cidx + ridx] = value; \ 174 b_noinsert:; \ 175 bilen[brow] = nrow; \ 176 } 177 178 #undef __FUNCT__ 179 #define __FUNCT__ "MatSetValues_MPIBAIJ" 180 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 181 { 182 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 183 MatScalar value; 184 PetscBool roworiented = baij->roworiented; 185 PetscErrorCode ierr; 186 PetscInt i,j,row,col; 187 PetscInt rstart_orig=mat->rmap->rstart; 188 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 189 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 190 191 /* Some Variables required in the macro */ 192 Mat A = baij->A; 193 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 194 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 195 MatScalar *aa =a->a; 196 197 Mat B = baij->B; 198 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 199 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 200 MatScalar *ba =b->a; 201 202 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 203 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 204 MatScalar *ap,*bap; 205 206 PetscFunctionBegin; 207 for (i=0; i<m; i++) { 208 if (im[i] < 0) continue; 209 #if defined(PETSC_USE_DEBUG) 210 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 211 #endif 212 if (im[i] >= rstart_orig && im[i] < rend_orig) { 213 row = im[i] - rstart_orig; 214 for (j=0; j<n; j++) { 215 if (in[j] >= cstart_orig && in[j] < cend_orig) { 216 col = in[j] - cstart_orig; 217 if (roworiented) value = v[i*n+j]; 218 else value = v[i+j*m]; 219 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 220 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 221 } else if (in[j] < 0) continue; 222 #if defined(PETSC_USE_DEBUG) 223 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 224 #endif 225 else { 226 if (mat->was_assembled) { 227 if (!baij->colmap) { 228 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 229 } 230 #if defined(PETSC_USE_CTABLE) 231 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 232 col = col - 1; 233 #else 234 col = baij->colmap[in[j]/bs] - 1; 235 #endif 236 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 237 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 238 col = in[j]; 239 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 240 B = baij->B; 241 b = (Mat_SeqBAIJ*)(B)->data; 242 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 243 ba =b->a; 244 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 245 else col += in[j]%bs; 246 } else col = in[j]; 247 if (roworiented) value = v[i*n+j]; 248 else value = v[i+j*m]; 249 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 250 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 251 } 252 } 253 } else { 254 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 255 if (!baij->donotstash) { 256 mat->assembled = PETSC_FALSE; 257 if (roworiented) { 258 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 259 } else { 260 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 261 } 262 } 263 } 264 } 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 270 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 271 { 272 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 273 const PetscScalar *value; 274 MatScalar *barray = baij->barray; 275 PetscBool roworiented = baij->roworiented; 276 PetscErrorCode ierr; 277 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 278 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 279 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 280 281 PetscFunctionBegin; 282 if (!barray) { 283 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 284 baij->barray = barray; 285 } 286 287 if (roworiented) stepval = (n-1)*bs; 288 else stepval = (m-1)*bs; 289 290 for (i=0; i<m; i++) { 291 if (im[i] < 0) continue; 292 #if defined(PETSC_USE_DEBUG) 293 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 294 #endif 295 if (im[i] >= rstart && im[i] < rend) { 296 row = im[i] - rstart; 297 for (j=0; j<n; j++) { 298 /* If NumCol = 1 then a copy is not required */ 299 if ((roworiented) && (n == 1)) { 300 barray = (MatScalar*)v + i*bs2; 301 } else if ((!roworiented) && (m == 1)) { 302 barray = (MatScalar*)v + j*bs2; 303 } else { /* Here a copy is required */ 304 if (roworiented) { 305 value = v + (i*(stepval+bs) + j)*bs; 306 } else { 307 value = v + (j*(stepval+bs) + i)*bs; 308 } 309 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 310 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 311 barray += bs; 312 } 313 barray -= bs2; 314 } 315 316 if (in[j] >= cstart && in[j] < cend) { 317 col = in[j] - cstart; 318 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 319 } else if (in[j] < 0) continue; 320 #if defined(PETSC_USE_DEBUG) 321 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 322 #endif 323 else { 324 if (mat->was_assembled) { 325 if (!baij->colmap) { 326 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 327 } 328 329 #if defined(PETSC_USE_DEBUG) 330 #if defined(PETSC_USE_CTABLE) 331 { PetscInt data; 332 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 333 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 334 } 335 #else 336 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 337 #endif 338 #endif 339 #if defined(PETSC_USE_CTABLE) 340 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 341 col = (col - 1)/bs; 342 #else 343 col = (baij->colmap[in[j]] - 1)/bs; 344 #endif 345 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 346 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 347 col = in[j]; 348 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 349 } else col = in[j]; 350 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 351 } 352 } 353 } else { 354 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 355 if (!baij->donotstash) { 356 if (roworiented) { 357 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 358 } else { 359 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 360 } 361 } 362 } 363 } 364 PetscFunctionReturn(0); 365 } 366 367 #define HASH_KEY 0.6180339887 368 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 369 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 370 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 371 #undef __FUNCT__ 372 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 373 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 374 { 375 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 376 PetscBool roworiented = baij->roworiented; 377 PetscErrorCode ierr; 378 PetscInt i,j,row,col; 379 PetscInt rstart_orig=mat->rmap->rstart; 380 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 381 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 382 PetscReal tmp; 383 MatScalar **HD = baij->hd,value; 384 #if defined(PETSC_USE_DEBUG) 385 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 386 #endif 387 388 PetscFunctionBegin; 389 for (i=0; i<m; i++) { 390 #if defined(PETSC_USE_DEBUG) 391 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 392 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 393 #endif 394 row = im[i]; 395 if (row >= rstart_orig && row < rend_orig) { 396 for (j=0; j<n; j++) { 397 col = in[j]; 398 if (roworiented) value = v[i*n+j]; 399 else value = v[i+j*m]; 400 /* Look up PetscInto the Hash Table */ 401 key = (row/bs)*Nbs+(col/bs)+1; 402 h1 = HASH(size,key,tmp); 403 404 405 idx = h1; 406 #if defined(PETSC_USE_DEBUG) 407 insert_ct++; 408 total_ct++; 409 if (HT[idx] != key) { 410 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 411 if (idx == size) { 412 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 413 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 414 } 415 } 416 #else 417 if (HT[idx] != key) { 418 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 419 if (idx == size) { 420 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 421 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 422 } 423 } 424 #endif 425 /* A HASH table entry is found, so insert the values at the correct address */ 426 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 427 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 428 } 429 } else if (!baij->donotstash) { 430 if (roworiented) { 431 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 432 } else { 433 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 434 } 435 } 436 } 437 #if defined(PETSC_USE_DEBUG) 438 baij->ht_total_ct = total_ct; 439 baij->ht_insert_ct = insert_ct; 440 #endif 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 446 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 447 { 448 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 449 PetscBool roworiented = baij->roworiented; 450 PetscErrorCode ierr; 451 PetscInt i,j,ii,jj,row,col; 452 PetscInt rstart=baij->rstartbs; 453 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 454 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 455 PetscReal tmp; 456 MatScalar **HD = baij->hd,*baij_a; 457 const PetscScalar *v_t,*value; 458 #if defined(PETSC_USE_DEBUG) 459 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 460 #endif 461 462 PetscFunctionBegin; 463 if (roworiented) stepval = (n-1)*bs; 464 else stepval = (m-1)*bs; 465 466 for (i=0; i<m; i++) { 467 #if defined(PETSC_USE_DEBUG) 468 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 469 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 470 #endif 471 row = im[i]; 472 v_t = v + i*nbs2; 473 if (row >= rstart && row < rend) { 474 for (j=0; j<n; j++) { 475 col = in[j]; 476 477 /* Look up into the Hash Table */ 478 key = row*Nbs+col+1; 479 h1 = HASH(size,key,tmp); 480 481 idx = h1; 482 #if defined(PETSC_USE_DEBUG) 483 total_ct++; 484 insert_ct++; 485 if (HT[idx] != key) { 486 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 487 if (idx == size) { 488 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 489 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 490 } 491 } 492 #else 493 if (HT[idx] != key) { 494 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 495 if (idx == size) { 496 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 497 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 498 } 499 } 500 #endif 501 baij_a = HD[idx]; 502 if (roworiented) { 503 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 504 /* value = v + (i*(stepval+bs)+j)*bs; */ 505 value = v_t; 506 v_t += bs; 507 if (addv == ADD_VALUES) { 508 for (ii=0; ii<bs; ii++,value+=stepval) { 509 for (jj=ii; jj<bs2; jj+=bs) { 510 baij_a[jj] += *value++; 511 } 512 } 513 } else { 514 for (ii=0; ii<bs; ii++,value+=stepval) { 515 for (jj=ii; jj<bs2; jj+=bs) { 516 baij_a[jj] = *value++; 517 } 518 } 519 } 520 } else { 521 value = v + j*(stepval+bs)*bs + i*bs; 522 if (addv == ADD_VALUES) { 523 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 524 for (jj=0; jj<bs; jj++) { 525 baij_a[jj] += *value++; 526 } 527 } 528 } else { 529 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 530 for (jj=0; jj<bs; jj++) { 531 baij_a[jj] = *value++; 532 } 533 } 534 } 535 } 536 } 537 } else { 538 if (!baij->donotstash) { 539 if (roworiented) { 540 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 541 } else { 542 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 543 } 544 } 545 } 546 } 547 #if defined(PETSC_USE_DEBUG) 548 baij->ht_total_ct = total_ct; 549 baij->ht_insert_ct = insert_ct; 550 #endif 551 PetscFunctionReturn(0); 552 } 553 554 #undef __FUNCT__ 555 #define __FUNCT__ "MatGetValues_MPIBAIJ" 556 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 557 { 558 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 559 PetscErrorCode ierr; 560 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 561 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 562 563 PetscFunctionBegin; 564 for (i=0; i<m; i++) { 565 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 566 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 567 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 568 row = idxm[i] - bsrstart; 569 for (j=0; j<n; j++) { 570 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 571 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 572 if (idxn[j] >= bscstart && idxn[j] < bscend) { 573 col = idxn[j] - bscstart; 574 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 575 } else { 576 if (!baij->colmap) { 577 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 578 } 579 #if defined(PETSC_USE_CTABLE) 580 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 581 data--; 582 #else 583 data = baij->colmap[idxn[j]/bs]-1; 584 #endif 585 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 586 else { 587 col = data + idxn[j]%bs; 588 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589 } 590 } 591 } 592 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 593 } 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatNorm_MPIBAIJ" 599 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 600 { 601 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 602 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 603 PetscErrorCode ierr; 604 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 605 PetscReal sum = 0.0; 606 MatScalar *v; 607 608 PetscFunctionBegin; 609 if (baij->size == 1) { 610 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 611 } else { 612 if (type == NORM_FROBENIUS) { 613 v = amat->a; 614 nz = amat->nz*bs2; 615 for (i=0; i<nz; i++) { 616 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 617 } 618 v = bmat->a; 619 nz = bmat->nz*bs2; 620 for (i=0; i<nz; i++) { 621 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 622 } 623 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 624 *nrm = PetscSqrtReal(*nrm); 625 } else if (type == NORM_1) { /* max column sum */ 626 PetscReal *tmp,*tmp2; 627 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 628 ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr); 629 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 630 v = amat->a; jj = amat->j; 631 for (i=0; i<amat->nz; i++) { 632 for (j=0; j<bs; j++) { 633 col = bs*(cstart + *jj) + j; /* column index */ 634 for (row=0; row<bs; row++) { 635 tmp[col] += PetscAbsScalar(*v); v++; 636 } 637 } 638 jj++; 639 } 640 v = bmat->a; jj = bmat->j; 641 for (i=0; i<bmat->nz; i++) { 642 for (j=0; j<bs; j++) { 643 col = bs*garray[*jj] + j; 644 for (row=0; row<bs; row++) { 645 tmp[col] += PetscAbsScalar(*v); v++; 646 } 647 } 648 jj++; 649 } 650 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 651 *nrm = 0.0; 652 for (j=0; j<mat->cmap->N; j++) { 653 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 654 } 655 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 656 } else if (type == NORM_INFINITY) { /* max row sum */ 657 PetscReal *sums; 658 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 659 sum = 0.0; 660 for (j=0; j<amat->mbs; j++) { 661 for (row=0; row<bs; row++) sums[row] = 0.0; 662 v = amat->a + bs2*amat->i[j]; 663 nz = amat->i[j+1]-amat->i[j]; 664 for (i=0; i<nz; i++) { 665 for (col=0; col<bs; col++) { 666 for (row=0; row<bs; row++) { 667 sums[row] += PetscAbsScalar(*v); v++; 668 } 669 } 670 } 671 v = bmat->a + bs2*bmat->i[j]; 672 nz = bmat->i[j+1]-bmat->i[j]; 673 for (i=0; i<nz; i++) { 674 for (col=0; col<bs; col++) { 675 for (row=0; row<bs; row++) { 676 sums[row] += PetscAbsScalar(*v); v++; 677 } 678 } 679 } 680 for (row=0; row<bs; row++) { 681 if (sums[row] > sum) sum = sums[row]; 682 } 683 } 684 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 685 ierr = PetscFree(sums);CHKERRQ(ierr); 686 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 687 } 688 PetscFunctionReturn(0); 689 } 690 691 /* 692 Creates the hash table, and sets the table 693 This table is created only once. 694 If new entried need to be added to the matrix 695 then the hash table has to be destroyed and 696 recreated. 697 */ 698 #undef __FUNCT__ 699 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 700 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 701 { 702 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 703 Mat A = baij->A,B=baij->B; 704 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 705 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 706 PetscErrorCode ierr; 707 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 708 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 709 PetscInt *HT,key; 710 MatScalar **HD; 711 PetscReal tmp; 712 #if defined(PETSC_USE_INFO) 713 PetscInt ct=0,max=0; 714 #endif 715 716 PetscFunctionBegin; 717 if (baij->ht) PetscFunctionReturn(0); 718 719 baij->ht_size = (PetscInt)(factor*nz); 720 ht_size = baij->ht_size; 721 722 /* Allocate Memory for Hash Table */ 723 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 724 HD = baij->hd; 725 HT = baij->ht; 726 727 /* Loop Over A */ 728 for (i=0; i<a->mbs; i++) { 729 for (j=ai[i]; j<ai[i+1]; j++) { 730 row = i+rstart; 731 col = aj[j]+cstart; 732 733 key = row*Nbs + col + 1; 734 h1 = HASH(ht_size,key,tmp); 735 for (k=0; k<ht_size; k++) { 736 if (!HT[(h1+k)%ht_size]) { 737 HT[(h1+k)%ht_size] = key; 738 HD[(h1+k)%ht_size] = a->a + j*bs2; 739 break; 740 #if defined(PETSC_USE_INFO) 741 } else { 742 ct++; 743 #endif 744 } 745 } 746 #if defined(PETSC_USE_INFO) 747 if (k> max) max = k; 748 #endif 749 } 750 } 751 /* Loop Over B */ 752 for (i=0; i<b->mbs; i++) { 753 for (j=bi[i]; j<bi[i+1]; j++) { 754 row = i+rstart; 755 col = garray[bj[j]]; 756 key = row*Nbs + col + 1; 757 h1 = HASH(ht_size,key,tmp); 758 for (k=0; k<ht_size; k++) { 759 if (!HT[(h1+k)%ht_size]) { 760 HT[(h1+k)%ht_size] = key; 761 HD[(h1+k)%ht_size] = b->a + j*bs2; 762 break; 763 #if defined(PETSC_USE_INFO) 764 } else { 765 ct++; 766 #endif 767 } 768 } 769 #if defined(PETSC_USE_INFO) 770 if (k> max) max = k; 771 #endif 772 } 773 } 774 775 /* Print Summary */ 776 #if defined(PETSC_USE_INFO) 777 for (i=0,j=0; i<ht_size; i++) { 778 if (HT[i]) j++; 779 } 780 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 781 #endif 782 PetscFunctionReturn(0); 783 } 784 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 787 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 788 { 789 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 790 PetscErrorCode ierr; 791 PetscInt nstash,reallocs; 792 793 PetscFunctionBegin; 794 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 795 796 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 797 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 798 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 799 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 800 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 801 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 802 PetscFunctionReturn(0); 803 } 804 805 #undef __FUNCT__ 806 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 807 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 808 { 809 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 810 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 811 PetscErrorCode ierr; 812 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 813 PetscInt *row,*col; 814 PetscBool r1,r2,r3,other_disassembled; 815 MatScalar *val; 816 PetscMPIInt n; 817 818 PetscFunctionBegin; 819 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 820 if (!baij->donotstash && !mat->nooffprocentries) { 821 while (1) { 822 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 823 if (!flg) break; 824 825 for (i=0; i<n;) { 826 /* Now identify the consecutive vals belonging to the same row */ 827 for (j=i,rstart=row[j]; j<n; j++) { 828 if (row[j] != rstart) break; 829 } 830 if (j < n) ncols = j-i; 831 else ncols = n-i; 832 /* Now assemble all these values with a single function call */ 833 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 834 i = j; 835 } 836 } 837 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 838 /* Now process the block-stash. Since the values are stashed column-oriented, 839 set the roworiented flag to column oriented, and after MatSetValues() 840 restore the original flags */ 841 r1 = baij->roworiented; 842 r2 = a->roworiented; 843 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 844 845 baij->roworiented = PETSC_FALSE; 846 a->roworiented = PETSC_FALSE; 847 848 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 849 while (1) { 850 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 851 if (!flg) break; 852 853 for (i=0; i<n;) { 854 /* Now identify the consecutive vals belonging to the same row */ 855 for (j=i,rstart=row[j]; j<n; j++) { 856 if (row[j] != rstart) break; 857 } 858 if (j < n) ncols = j-i; 859 else ncols = n-i; 860 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 861 i = j; 862 } 863 } 864 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 865 866 baij->roworiented = r1; 867 a->roworiented = r2; 868 869 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 870 } 871 872 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 873 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 874 875 /* determine if any processor has disassembled, if so we must 876 also disassemble ourselfs, in order that we may reassemble. */ 877 /* 878 if nonzero structure of submatrix B cannot change then we know that 879 no processor disassembled thus we can skip this stuff 880 */ 881 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 882 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 883 if (mat->was_assembled && !other_disassembled) { 884 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 885 } 886 } 887 888 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 889 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 890 } 891 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 892 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 893 894 #if defined(PETSC_USE_INFO) 895 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 896 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 897 898 baij->ht_total_ct = 0; 899 baij->ht_insert_ct = 0; 900 } 901 #endif 902 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 903 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 904 905 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 906 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 907 } 908 909 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 910 911 baij->rowvalues = 0; 912 913 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 914 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 915 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 916 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 917 } 918 PetscFunctionReturn(0); 919 } 920 921 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 922 #include <petscdraw.h> 923 #undef __FUNCT__ 924 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 925 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 926 { 927 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 928 PetscErrorCode ierr; 929 PetscMPIInt rank = baij->rank; 930 PetscInt bs = mat->rmap->bs; 931 PetscBool iascii,isdraw; 932 PetscViewer sviewer; 933 PetscViewerFormat format; 934 935 PetscFunctionBegin; 936 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 937 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 938 if (iascii) { 939 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 940 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 941 MatInfo info; 942 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 943 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 944 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 945 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 946 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 947 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 948 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 949 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 950 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 951 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 952 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 953 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 954 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 955 PetscFunctionReturn(0); 956 } else if (format == PETSC_VIEWER_ASCII_INFO) { 957 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 958 PetscFunctionReturn(0); 959 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 960 PetscFunctionReturn(0); 961 } 962 } 963 964 if (isdraw) { 965 PetscDraw draw; 966 PetscBool isnull; 967 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 968 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 969 } 970 971 { 972 /* assemble the entire matrix onto first processor. */ 973 Mat A; 974 Mat_SeqBAIJ *Aloc; 975 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 976 MatScalar *a; 977 const char *matname; 978 979 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 980 /* Perhaps this should be the type of mat? */ 981 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 982 if (!rank) { 983 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 984 } else { 985 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 986 } 987 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 988 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 989 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 990 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 991 992 /* copy over the A part */ 993 Aloc = (Mat_SeqBAIJ*)baij->A->data; 994 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 995 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 996 997 for (i=0; i<mbs; i++) { 998 rvals[0] = bs*(baij->rstartbs + i); 999 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1000 for (j=ai[i]; j<ai[i+1]; j++) { 1001 col = (baij->cstartbs+aj[j])*bs; 1002 for (k=0; k<bs; k++) { 1003 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1004 col++; a += bs; 1005 } 1006 } 1007 } 1008 /* copy over the B part */ 1009 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1010 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1011 for (i=0; i<mbs; i++) { 1012 rvals[0] = bs*(baij->rstartbs + i); 1013 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1014 for (j=ai[i]; j<ai[i+1]; j++) { 1015 col = baij->garray[aj[j]]*bs; 1016 for (k=0; k<bs; k++) { 1017 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1018 col++; a += bs; 1019 } 1020 } 1021 } 1022 ierr = PetscFree(rvals);CHKERRQ(ierr); 1023 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1024 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1025 /* 1026 Everyone has to call to draw the matrix since the graphics waits are 1027 synchronized across all processors that share the PetscDraw object 1028 */ 1029 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1030 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 1031 if (!rank) { 1032 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 1033 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1034 } 1035 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1036 ierr = MatDestroy(&A);CHKERRQ(ierr); 1037 } 1038 PetscFunctionReturn(0); 1039 } 1040 1041 #undef __FUNCT__ 1042 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1043 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1044 { 1045 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1046 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1047 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1048 PetscErrorCode ierr; 1049 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1050 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1051 int fd; 1052 PetscScalar *column_values; 1053 FILE *file; 1054 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1055 PetscInt message_count,flowcontrolcount; 1056 1057 PetscFunctionBegin; 1058 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1059 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1060 nz = bs2*(A->nz + B->nz); 1061 rlen = mat->rmap->n; 1062 if (!rank) { 1063 header[0] = MAT_FILE_CLASSID; 1064 header[1] = mat->rmap->N; 1065 header[2] = mat->cmap->N; 1066 1067 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1068 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1069 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1070 /* get largest number of rows any processor has */ 1071 range = mat->rmap->range; 1072 for (i=1; i<size; i++) { 1073 rlen = PetscMax(rlen,range[i+1] - range[i]); 1074 } 1075 } else { 1076 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1077 } 1078 1079 ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr); 1080 /* compute lengths of each row */ 1081 for (i=0; i<a->mbs; i++) { 1082 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1083 } 1084 /* store the row lengths to the file */ 1085 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1086 if (!rank) { 1087 MPI_Status status; 1088 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1089 rlen = (range[1] - range[0])/bs; 1090 for (i=0; i<rlen; i++) { 1091 for (j=0; j<bs; j++) { 1092 row_lens[i*bs+j] = bs*crow_lens[i]; 1093 } 1094 } 1095 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1096 for (i=1; i<size; i++) { 1097 rlen = (range[i+1] - range[i])/bs; 1098 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1099 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1100 for (k=0; k<rlen; k++) { 1101 for (j=0; j<bs; j++) { 1102 row_lens[k*bs+j] = bs*crow_lens[k]; 1103 } 1104 } 1105 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1106 } 1107 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1108 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1109 } else { 1110 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1111 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1112 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1113 } 1114 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1115 1116 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1117 information needed to make it for each row from a block row. This does require more communication but still not more than 1118 the communication needed for the nonzero values */ 1119 nzmax = nz; /* space a largest processor needs */ 1120 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1121 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1122 cnt = 0; 1123 for (i=0; i<a->mbs; i++) { 1124 pcnt = cnt; 1125 for (j=B->i[i]; j<B->i[i+1]; j++) { 1126 if ((col = garray[B->j[j]]) > cstart) break; 1127 for (l=0; l<bs; l++) { 1128 column_indices[cnt++] = bs*col+l; 1129 } 1130 } 1131 for (k=A->i[i]; k<A->i[i+1]; k++) { 1132 for (l=0; l<bs; l++) { 1133 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1134 } 1135 } 1136 for (; j<B->i[i+1]; j++) { 1137 for (l=0; l<bs; l++) { 1138 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1139 } 1140 } 1141 len = cnt - pcnt; 1142 for (k=1; k<bs; k++) { 1143 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1144 cnt += len; 1145 } 1146 } 1147 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1148 1149 /* store the columns to the file */ 1150 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1151 if (!rank) { 1152 MPI_Status status; 1153 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1154 for (i=1; i<size; i++) { 1155 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1156 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1157 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1158 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1159 } 1160 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1161 } else { 1162 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1163 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1164 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1165 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1166 } 1167 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1168 1169 /* load up the numerical values */ 1170 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1171 cnt = 0; 1172 for (i=0; i<a->mbs; i++) { 1173 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1174 for (j=B->i[i]; j<B->i[i+1]; j++) { 1175 if (garray[B->j[j]] > cstart) break; 1176 for (l=0; l<bs; l++) { 1177 for (ll=0; ll<bs; ll++) { 1178 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1179 } 1180 } 1181 cnt += bs; 1182 } 1183 for (k=A->i[i]; k<A->i[i+1]; k++) { 1184 for (l=0; l<bs; l++) { 1185 for (ll=0; ll<bs; ll++) { 1186 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1187 } 1188 } 1189 cnt += bs; 1190 } 1191 for (; j<B->i[i+1]; j++) { 1192 for (l=0; l<bs; l++) { 1193 for (ll=0; ll<bs; ll++) { 1194 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1195 } 1196 } 1197 cnt += bs; 1198 } 1199 cnt += (bs-1)*rlen; 1200 } 1201 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1202 1203 /* store the column values to the file */ 1204 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1205 if (!rank) { 1206 MPI_Status status; 1207 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1208 for (i=1; i<size; i++) { 1209 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1210 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1211 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1212 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1213 } 1214 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1215 } else { 1216 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1217 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1218 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1219 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1220 } 1221 ierr = PetscFree(column_values);CHKERRQ(ierr); 1222 1223 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1224 if (file) { 1225 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1226 } 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "MatView_MPIBAIJ" 1232 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1233 { 1234 PetscErrorCode ierr; 1235 PetscBool iascii,isdraw,issocket,isbinary; 1236 1237 PetscFunctionBegin; 1238 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1239 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1240 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1241 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1242 if (iascii || isdraw || issocket) { 1243 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1244 } else if (isbinary) { 1245 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1246 } 1247 PetscFunctionReturn(0); 1248 } 1249 1250 #undef __FUNCT__ 1251 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1252 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1253 { 1254 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1255 PetscErrorCode ierr; 1256 1257 PetscFunctionBegin; 1258 #if defined(PETSC_USE_LOG) 1259 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1260 #endif 1261 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1262 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1263 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1264 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1265 #if defined(PETSC_USE_CTABLE) 1266 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1267 #else 1268 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1269 #endif 1270 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1271 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1272 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1273 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1274 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1275 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1276 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1277 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1278 1279 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "MatMult_MPIBAIJ" 1294 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1295 { 1296 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1297 PetscErrorCode ierr; 1298 PetscInt nt; 1299 1300 PetscFunctionBegin; 1301 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1302 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1303 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1304 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1305 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1306 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1307 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1308 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1309 PetscFunctionReturn(0); 1310 } 1311 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1314 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1315 { 1316 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1317 PetscErrorCode ierr; 1318 1319 PetscFunctionBegin; 1320 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1321 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1322 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1323 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1324 PetscFunctionReturn(0); 1325 } 1326 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1329 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1330 { 1331 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1332 PetscErrorCode ierr; 1333 PetscBool merged; 1334 1335 PetscFunctionBegin; 1336 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1337 /* do nondiagonal part */ 1338 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1339 if (!merged) { 1340 /* send it on its way */ 1341 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1342 /* do local part */ 1343 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1344 /* receive remote parts: note this assumes the values are not actually */ 1345 /* inserted in yy until the next line */ 1346 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1347 } else { 1348 /* do local part */ 1349 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1350 /* send it on its way */ 1351 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1352 /* values actually were received in the Begin() but we need to call this nop */ 1353 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1354 } 1355 PetscFunctionReturn(0); 1356 } 1357 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1360 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1361 { 1362 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1363 PetscErrorCode ierr; 1364 1365 PetscFunctionBegin; 1366 /* do nondiagonal part */ 1367 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1368 /* send it on its way */ 1369 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1370 /* do local part */ 1371 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1372 /* receive remote parts: note this assumes the values are not actually */ 1373 /* inserted in yy until the next line, which is true for my implementation*/ 1374 /* but is not perhaps always true. */ 1375 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 1379 /* 1380 This only works correctly for square matrices where the subblock A->A is the 1381 diagonal block 1382 */ 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1385 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1386 { 1387 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1388 PetscErrorCode ierr; 1389 1390 PetscFunctionBegin; 1391 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1392 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1393 PetscFunctionReturn(0); 1394 } 1395 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MatScale_MPIBAIJ" 1398 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1399 { 1400 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1401 PetscErrorCode ierr; 1402 1403 PetscFunctionBegin; 1404 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1405 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 1409 #undef __FUNCT__ 1410 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1411 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1412 { 1413 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1414 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1415 PetscErrorCode ierr; 1416 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1417 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1418 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1419 1420 PetscFunctionBegin; 1421 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1422 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1423 mat->getrowactive = PETSC_TRUE; 1424 1425 if (!mat->rowvalues && (idx || v)) { 1426 /* 1427 allocate enough space to hold information from the longest row. 1428 */ 1429 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1430 PetscInt max = 1,mbs = mat->mbs,tmp; 1431 for (i=0; i<mbs; i++) { 1432 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1433 if (max < tmp) max = tmp; 1434 } 1435 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1436 } 1437 lrow = row - brstart; 1438 1439 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1440 if (!v) {pvA = 0; pvB = 0;} 1441 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1442 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1443 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1444 nztot = nzA + nzB; 1445 1446 cmap = mat->garray; 1447 if (v || idx) { 1448 if (nztot) { 1449 /* Sort by increasing column numbers, assuming A and B already sorted */ 1450 PetscInt imark = -1; 1451 if (v) { 1452 *v = v_p = mat->rowvalues; 1453 for (i=0; i<nzB; i++) { 1454 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1455 else break; 1456 } 1457 imark = i; 1458 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1459 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1460 } 1461 if (idx) { 1462 *idx = idx_p = mat->rowindices; 1463 if (imark > -1) { 1464 for (i=0; i<imark; i++) { 1465 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1466 } 1467 } else { 1468 for (i=0; i<nzB; i++) { 1469 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1470 else break; 1471 } 1472 imark = i; 1473 } 1474 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1475 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1476 } 1477 } else { 1478 if (idx) *idx = 0; 1479 if (v) *v = 0; 1480 } 1481 } 1482 *nz = nztot; 1483 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1484 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1485 PetscFunctionReturn(0); 1486 } 1487 1488 #undef __FUNCT__ 1489 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1490 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1491 { 1492 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1493 1494 PetscFunctionBegin; 1495 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1496 baij->getrowactive = PETSC_FALSE; 1497 PetscFunctionReturn(0); 1498 } 1499 1500 #undef __FUNCT__ 1501 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1502 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1503 { 1504 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1505 PetscErrorCode ierr; 1506 1507 PetscFunctionBegin; 1508 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1509 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1510 PetscFunctionReturn(0); 1511 } 1512 1513 #undef __FUNCT__ 1514 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1515 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1516 { 1517 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1518 Mat A = a->A,B = a->B; 1519 PetscErrorCode ierr; 1520 PetscReal isend[5],irecv[5]; 1521 1522 PetscFunctionBegin; 1523 info->block_size = (PetscReal)matin->rmap->bs; 1524 1525 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1526 1527 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1528 isend[3] = info->memory; isend[4] = info->mallocs; 1529 1530 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1531 1532 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1533 isend[3] += info->memory; isend[4] += info->mallocs; 1534 1535 if (flag == MAT_LOCAL) { 1536 info->nz_used = isend[0]; 1537 info->nz_allocated = isend[1]; 1538 info->nz_unneeded = isend[2]; 1539 info->memory = isend[3]; 1540 info->mallocs = isend[4]; 1541 } else if (flag == MAT_GLOBAL_MAX) { 1542 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1543 1544 info->nz_used = irecv[0]; 1545 info->nz_allocated = irecv[1]; 1546 info->nz_unneeded = irecv[2]; 1547 info->memory = irecv[3]; 1548 info->mallocs = irecv[4]; 1549 } else if (flag == MAT_GLOBAL_SUM) { 1550 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1551 1552 info->nz_used = irecv[0]; 1553 info->nz_allocated = irecv[1]; 1554 info->nz_unneeded = irecv[2]; 1555 info->memory = irecv[3]; 1556 info->mallocs = irecv[4]; 1557 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1558 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1559 info->fill_ratio_needed = 0; 1560 info->factor_mallocs = 0; 1561 PetscFunctionReturn(0); 1562 } 1563 1564 #undef __FUNCT__ 1565 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1566 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1567 { 1568 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1569 PetscErrorCode ierr; 1570 1571 PetscFunctionBegin; 1572 switch (op) { 1573 case MAT_NEW_NONZERO_LOCATIONS: 1574 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1575 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1576 case MAT_KEEP_NONZERO_PATTERN: 1577 case MAT_NEW_NONZERO_LOCATION_ERR: 1578 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1579 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1580 break; 1581 case MAT_ROW_ORIENTED: 1582 a->roworiented = flg; 1583 1584 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1585 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1586 break; 1587 case MAT_NEW_DIAGONALS: 1588 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1589 break; 1590 case MAT_IGNORE_OFF_PROC_ENTRIES: 1591 a->donotstash = flg; 1592 break; 1593 case MAT_USE_HASH_TABLE: 1594 a->ht_flag = flg; 1595 break; 1596 case MAT_SYMMETRIC: 1597 case MAT_STRUCTURALLY_SYMMETRIC: 1598 case MAT_HERMITIAN: 1599 case MAT_SYMMETRY_ETERNAL: 1600 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1601 break; 1602 default: 1603 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1604 } 1605 PetscFunctionReturn(0); 1606 } 1607 1608 #undef __FUNCT__ 1609 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1610 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1611 { 1612 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1613 Mat_SeqBAIJ *Aloc; 1614 Mat B; 1615 PetscErrorCode ierr; 1616 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1617 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1618 MatScalar *a; 1619 1620 PetscFunctionBegin; 1621 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1622 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1623 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1624 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1625 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1626 /* Do not know preallocation information, but must set block size */ 1627 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1628 } else { 1629 B = *matout; 1630 } 1631 1632 /* copy over the A part */ 1633 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1634 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1635 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1636 1637 for (i=0; i<mbs; i++) { 1638 rvals[0] = bs*(baij->rstartbs + i); 1639 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1640 for (j=ai[i]; j<ai[i+1]; j++) { 1641 col = (baij->cstartbs+aj[j])*bs; 1642 for (k=0; k<bs; k++) { 1643 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1644 1645 col++; a += bs; 1646 } 1647 } 1648 } 1649 /* copy over the B part */ 1650 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1651 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1652 for (i=0; i<mbs; i++) { 1653 rvals[0] = bs*(baij->rstartbs + i); 1654 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1655 for (j=ai[i]; j<ai[i+1]; j++) { 1656 col = baij->garray[aj[j]]*bs; 1657 for (k=0; k<bs; k++) { 1658 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1659 col++; 1660 a += bs; 1661 } 1662 } 1663 } 1664 ierr = PetscFree(rvals);CHKERRQ(ierr); 1665 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1666 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1667 1668 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1669 else { 1670 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1671 } 1672 PetscFunctionReturn(0); 1673 } 1674 1675 #undef __FUNCT__ 1676 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1677 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1678 { 1679 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1680 Mat a = baij->A,b = baij->B; 1681 PetscErrorCode ierr; 1682 PetscInt s1,s2,s3; 1683 1684 PetscFunctionBegin; 1685 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1686 if (rr) { 1687 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1688 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1689 /* Overlap communication with computation. */ 1690 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1691 } 1692 if (ll) { 1693 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1694 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1695 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1696 } 1697 /* scale the diagonal block */ 1698 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1699 1700 if (rr) { 1701 /* Do a scatter end and then right scale the off-diagonal block */ 1702 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1703 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1704 } 1705 PetscFunctionReturn(0); 1706 } 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1710 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1711 { 1712 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1713 PetscInt *owners = A->rmap->range; 1714 PetscInt n = A->rmap->n; 1715 PetscSF sf; 1716 PetscInt *lrows; 1717 PetscSFNode *rrows; 1718 PetscInt r, p = 0, len = 0; 1719 PetscErrorCode ierr; 1720 1721 PetscFunctionBegin; 1722 /* Create SF where leaves are input rows and roots are owned rows */ 1723 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1724 for (r = 0; r < n; ++r) lrows[r] = -1; 1725 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 1726 for (r = 0; r < N; ++r) { 1727 const PetscInt idx = rows[r]; 1728 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1729 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1730 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1731 } 1732 if (A->nooffproczerorows) { 1733 if (p != l->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,l->rank); 1734 lrows[len++] = idx - owners[p]; 1735 } else { 1736 rrows[r].rank = p; 1737 rrows[r].index = rows[r] - owners[p]; 1738 } 1739 } 1740 if (!A->nooffproczerorows) { 1741 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1742 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1743 /* Collect flags for rows to be zeroed */ 1744 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1745 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1746 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1747 /* Compress and put in row numbers */ 1748 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1749 } 1750 /* fix right hand side if needed */ 1751 if (x && b) { 1752 const PetscScalar *xx; 1753 PetscScalar *bb; 1754 1755 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1756 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1757 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1758 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1759 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1760 } 1761 1762 /* actually zap the local rows */ 1763 /* 1764 Zero the required rows. If the "diagonal block" of the matrix 1765 is square and the user wishes to set the diagonal we use separate 1766 code so that MatSetValues() is not called for each diagonal allocating 1767 new memory, thus calling lots of mallocs and slowing things down. 1768 1769 */ 1770 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1771 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1772 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1773 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1774 } else if (diag != 0.0) { 1775 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1776 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1777 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1778 for (r = 0; r < len; ++r) { 1779 const PetscInt row = lrows[r] + A->rmap->rstart; 1780 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1781 } 1782 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1783 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1784 } else { 1785 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1786 } 1787 ierr = PetscFree(lrows);CHKERRQ(ierr); 1788 1789 /* only change matrix nonzero state if pattern was allowed to be changed */ 1790 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1791 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1792 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1793 } 1794 PetscFunctionReturn(0); 1795 } 1796 1797 #undef __FUNCT__ 1798 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ" 1799 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1800 { 1801 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1802 PetscErrorCode ierr; 1803 PetscMPIInt n = A->rmap->n; 1804 PetscInt i,j,k,r,p = 0,len = 0,row,col,count; 1805 PetscInt *lrows,*owners = A->rmap->range; 1806 PetscSFNode *rrows; 1807 PetscSF sf; 1808 const PetscScalar *xx; 1809 PetscScalar *bb,*mask; 1810 Vec xmask,lmask; 1811 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1812 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1813 PetscScalar *aa; 1814 1815 PetscFunctionBegin; 1816 /* Create SF where leaves are input rows and roots are owned rows */ 1817 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1818 for (r = 0; r < n; ++r) lrows[r] = -1; 1819 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1820 for (r = 0; r < N; ++r) { 1821 const PetscInt idx = rows[r]; 1822 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1823 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1824 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1825 } 1826 rrows[r].rank = p; 1827 rrows[r].index = rows[r] - owners[p]; 1828 } 1829 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1830 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1831 /* Collect flags for rows to be zeroed */ 1832 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1833 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1834 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1835 /* Compress and put in row numbers */ 1836 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1837 /* zero diagonal part of matrix */ 1838 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1839 /* handle off diagonal part of matrix */ 1840 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1841 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1842 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1843 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1844 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1845 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1846 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1847 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1848 if (x) { 1849 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1850 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1851 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1852 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1853 } 1854 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1855 /* remove zeroed rows of off diagonal matrix */ 1856 for (i = 0; i < len; ++i) { 1857 row = lrows[i]; 1858 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1859 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1860 for (k = 0; k < count; ++k) { 1861 aa[0] = 0.0; 1862 aa += bs; 1863 } 1864 } 1865 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1866 for (i = 0; i < l->B->rmap->N; ++i) { 1867 row = i/bs; 1868 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1869 for (k = 0; k < bs; ++k) { 1870 col = bs*baij->j[j] + k; 1871 if (PetscAbsScalar(mask[col])) { 1872 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1873 if (b) bb[i] -= aa[0]*xx[col]; 1874 aa[0] = 0.0; 1875 } 1876 } 1877 } 1878 } 1879 if (x) { 1880 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1881 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1882 } 1883 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1884 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1885 ierr = PetscFree(lrows);CHKERRQ(ierr); 1886 1887 /* only change matrix nonzero state if pattern was allowed to be changed */ 1888 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1889 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1890 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1891 } 1892 PetscFunctionReturn(0); 1893 } 1894 1895 #undef __FUNCT__ 1896 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1897 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1898 { 1899 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1900 PetscErrorCode ierr; 1901 1902 PetscFunctionBegin; 1903 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1904 PetscFunctionReturn(0); 1905 } 1906 1907 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1908 1909 #undef __FUNCT__ 1910 #define __FUNCT__ "MatEqual_MPIBAIJ" 1911 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1912 { 1913 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1914 Mat a,b,c,d; 1915 PetscBool flg; 1916 PetscErrorCode ierr; 1917 1918 PetscFunctionBegin; 1919 a = matA->A; b = matA->B; 1920 c = matB->A; d = matB->B; 1921 1922 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1923 if (flg) { 1924 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1925 } 1926 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1927 PetscFunctionReturn(0); 1928 } 1929 1930 #undef __FUNCT__ 1931 #define __FUNCT__ "MatCopy_MPIBAIJ" 1932 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1933 { 1934 PetscErrorCode ierr; 1935 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1936 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1937 1938 PetscFunctionBegin; 1939 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1940 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1941 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1942 } else { 1943 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1944 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1945 } 1946 PetscFunctionReturn(0); 1947 } 1948 1949 #undef __FUNCT__ 1950 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1951 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1952 { 1953 PetscErrorCode ierr; 1954 1955 PetscFunctionBegin; 1956 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #undef __FUNCT__ 1961 #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ" 1962 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1963 { 1964 PetscErrorCode ierr; 1965 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 1966 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1967 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1968 1969 PetscFunctionBegin; 1970 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 1971 PetscFunctionReturn(0); 1972 } 1973 1974 #undef __FUNCT__ 1975 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1976 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1977 { 1978 PetscErrorCode ierr; 1979 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1980 PetscBLASInt bnz,one=1; 1981 Mat_SeqBAIJ *x,*y; 1982 1983 PetscFunctionBegin; 1984 if (str == SAME_NONZERO_PATTERN) { 1985 PetscScalar alpha = a; 1986 x = (Mat_SeqBAIJ*)xx->A->data; 1987 y = (Mat_SeqBAIJ*)yy->A->data; 1988 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1989 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1990 x = (Mat_SeqBAIJ*)xx->B->data; 1991 y = (Mat_SeqBAIJ*)yy->B->data; 1992 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1993 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1994 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1995 } else { 1996 Mat B; 1997 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1998 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1999 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2000 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2001 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2002 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2003 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2004 ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 2005 ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2006 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2007 ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2008 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 2009 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2010 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2011 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2012 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2013 } 2014 PetscFunctionReturn(0); 2015 } 2016 2017 #undef __FUNCT__ 2018 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2019 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2020 { 2021 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2022 PetscErrorCode ierr; 2023 2024 PetscFunctionBegin; 2025 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2026 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2027 PetscFunctionReturn(0); 2028 } 2029 2030 #undef __FUNCT__ 2031 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2032 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2033 { 2034 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2035 PetscErrorCode ierr; 2036 2037 PetscFunctionBegin; 2038 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2039 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2040 PetscFunctionReturn(0); 2041 } 2042 2043 #undef __FUNCT__ 2044 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2045 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2046 { 2047 PetscErrorCode ierr; 2048 IS iscol_local; 2049 PetscInt csize; 2050 2051 PetscFunctionBegin; 2052 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2053 if (call == MAT_REUSE_MATRIX) { 2054 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2055 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2056 } else { 2057 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2058 } 2059 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2060 if (call == MAT_INITIAL_MATRIX) { 2061 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2062 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2063 } 2064 PetscFunctionReturn(0); 2065 } 2066 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2067 #undef __FUNCT__ 2068 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2069 /* 2070 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2071 in local and then by concatenating the local matrices the end result. 2072 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2073 */ 2074 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2075 { 2076 PetscErrorCode ierr; 2077 PetscMPIInt rank,size; 2078 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2079 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2080 Mat M,Mreuse; 2081 MatScalar *vwork,*aa; 2082 MPI_Comm comm; 2083 IS isrow_new, iscol_new; 2084 PetscBool idflag,allrows, allcols; 2085 Mat_SeqBAIJ *aij; 2086 2087 PetscFunctionBegin; 2088 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2089 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2090 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2091 /* The compression and expansion should be avoided. Doesn't point 2092 out errors, might change the indices, hence buggey */ 2093 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2094 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2095 2096 /* Check for special case: each processor gets entire matrix columns */ 2097 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2098 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2099 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 2100 else allcols = PETSC_FALSE; 2101 2102 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2103 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2104 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 2105 else allrows = PETSC_FALSE; 2106 2107 if (call == MAT_REUSE_MATRIX) { 2108 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2109 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2110 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2111 } else { 2112 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2113 } 2114 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2115 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2116 /* 2117 m - number of local rows 2118 n - number of columns (same on all processors) 2119 rstart - first row in new global matrix generated 2120 */ 2121 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2122 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2123 m = m/bs; 2124 n = n/bs; 2125 2126 if (call == MAT_INITIAL_MATRIX) { 2127 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2128 ii = aij->i; 2129 jj = aij->j; 2130 2131 /* 2132 Determine the number of non-zeros in the diagonal and off-diagonal 2133 portions of the matrix in order to do correct preallocation 2134 */ 2135 2136 /* first get start and end of "diagonal" columns */ 2137 if (csize == PETSC_DECIDE) { 2138 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2139 if (mglobal == n*bs) { /* square matrix */ 2140 nlocal = m; 2141 } else { 2142 nlocal = n/size + ((n % size) > rank); 2143 } 2144 } else { 2145 nlocal = csize/bs; 2146 } 2147 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2148 rstart = rend - nlocal; 2149 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2150 2151 /* next, compute all the lengths */ 2152 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2153 for (i=0; i<m; i++) { 2154 jend = ii[i+1] - ii[i]; 2155 olen = 0; 2156 dlen = 0; 2157 for (j=0; j<jend; j++) { 2158 if (*jj < rstart || *jj >= rend) olen++; 2159 else dlen++; 2160 jj++; 2161 } 2162 olens[i] = olen; 2163 dlens[i] = dlen; 2164 } 2165 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2166 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2167 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2168 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2169 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2170 } else { 2171 PetscInt ml,nl; 2172 2173 M = *newmat; 2174 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2175 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2176 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2177 /* 2178 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2179 rather than the slower MatSetValues(). 2180 */ 2181 M->was_assembled = PETSC_TRUE; 2182 M->assembled = PETSC_FALSE; 2183 } 2184 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2185 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2186 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2187 ii = aij->i; 2188 jj = aij->j; 2189 aa = aij->a; 2190 for (i=0; i<m; i++) { 2191 row = rstart/bs + i; 2192 nz = ii[i+1] - ii[i]; 2193 cwork = jj; jj += nz; 2194 vwork = aa; aa += nz*bs*bs; 2195 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2196 } 2197 2198 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2199 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2200 *newmat = M; 2201 2202 /* save submatrix used in processor for next request */ 2203 if (call == MAT_INITIAL_MATRIX) { 2204 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2205 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2206 } 2207 PetscFunctionReturn(0); 2208 } 2209 2210 #undef __FUNCT__ 2211 #define __FUNCT__ "MatPermute_MPIBAIJ" 2212 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2213 { 2214 MPI_Comm comm,pcomm; 2215 PetscInt clocal_size,nrows; 2216 const PetscInt *rows; 2217 PetscMPIInt size; 2218 IS crowp,lcolp; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2223 /* make a collective version of 'rowp' */ 2224 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2225 if (pcomm==comm) { 2226 crowp = rowp; 2227 } else { 2228 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2229 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2230 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2231 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2232 } 2233 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2234 /* make a local version of 'colp' */ 2235 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2236 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2237 if (size==1) { 2238 lcolp = colp; 2239 } else { 2240 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2241 } 2242 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2243 /* now we just get the submatrix */ 2244 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2245 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2246 /* clean up */ 2247 if (pcomm!=comm) { 2248 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2249 } 2250 if (size>1) { 2251 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2252 } 2253 PetscFunctionReturn(0); 2254 } 2255 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2258 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2259 { 2260 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2261 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2262 2263 PetscFunctionBegin; 2264 if (nghosts) *nghosts = B->nbs; 2265 if (ghosts) *ghosts = baij->garray; 2266 PetscFunctionReturn(0); 2267 } 2268 2269 #undef __FUNCT__ 2270 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2271 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2272 { 2273 Mat B; 2274 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2275 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2276 Mat_SeqAIJ *b; 2277 PetscErrorCode ierr; 2278 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2279 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2280 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2281 2282 PetscFunctionBegin; 2283 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2284 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2285 2286 /* ---------------------------------------------------------------- 2287 Tell every processor the number of nonzeros per row 2288 */ 2289 ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr); 2290 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2291 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2292 } 2293 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2294 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2295 displs = recvcounts + size; 2296 for (i=0; i<size; i++) { 2297 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2298 displs[i] = A->rmap->range[i]/bs; 2299 } 2300 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2301 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2302 #else 2303 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2304 #endif 2305 /* --------------------------------------------------------------- 2306 Create the sequential matrix of the same type as the local block diagonal 2307 */ 2308 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2309 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2310 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2311 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2312 b = (Mat_SeqAIJ*)B->data; 2313 2314 /*-------------------------------------------------------------------- 2315 Copy my part of matrix column indices over 2316 */ 2317 sendcount = ad->nz + bd->nz; 2318 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2319 a_jsendbuf = ad->j; 2320 b_jsendbuf = bd->j; 2321 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2322 cnt = 0; 2323 for (i=0; i<n; i++) { 2324 2325 /* put in lower diagonal portion */ 2326 m = bd->i[i+1] - bd->i[i]; 2327 while (m > 0) { 2328 /* is it above diagonal (in bd (compressed) numbering) */ 2329 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2330 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2331 m--; 2332 } 2333 2334 /* put in diagonal portion */ 2335 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2336 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2337 } 2338 2339 /* put in upper diagonal portion */ 2340 while (m-- > 0) { 2341 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2342 } 2343 } 2344 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2345 2346 /*-------------------------------------------------------------------- 2347 Gather all column indices to all processors 2348 */ 2349 for (i=0; i<size; i++) { 2350 recvcounts[i] = 0; 2351 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2352 recvcounts[i] += lens[j]; 2353 } 2354 } 2355 displs[0] = 0; 2356 for (i=1; i<size; i++) { 2357 displs[i] = displs[i-1] + recvcounts[i-1]; 2358 } 2359 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2360 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2361 #else 2362 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2363 #endif 2364 /*-------------------------------------------------------------------- 2365 Assemble the matrix into useable form (note numerical values not yet set) 2366 */ 2367 /* set the b->ilen (length of each row) values */ 2368 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2369 /* set the b->i indices */ 2370 b->i[0] = 0; 2371 for (i=1; i<=A->rmap->N/bs; i++) { 2372 b->i[i] = b->i[i-1] + lens[i-1]; 2373 } 2374 ierr = PetscFree(lens);CHKERRQ(ierr); 2375 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2376 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2377 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2378 2379 if (A->symmetric) { 2380 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2381 } else if (A->hermitian) { 2382 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2383 } else if (A->structurally_symmetric) { 2384 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2385 } 2386 *newmat = B; 2387 PetscFunctionReturn(0); 2388 } 2389 2390 #undef __FUNCT__ 2391 #define __FUNCT__ "MatSOR_MPIBAIJ" 2392 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2393 { 2394 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2395 PetscErrorCode ierr; 2396 Vec bb1 = 0; 2397 2398 PetscFunctionBegin; 2399 if (flag == SOR_APPLY_UPPER) { 2400 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2401 PetscFunctionReturn(0); 2402 } 2403 2404 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2405 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2406 } 2407 2408 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2409 if (flag & SOR_ZERO_INITIAL_GUESS) { 2410 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2411 its--; 2412 } 2413 2414 while (its--) { 2415 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2416 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2417 2418 /* update rhs: bb1 = bb - B*x */ 2419 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2420 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2421 2422 /* local sweep */ 2423 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2424 } 2425 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2426 if (flag & SOR_ZERO_INITIAL_GUESS) { 2427 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2428 its--; 2429 } 2430 while (its--) { 2431 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2432 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2433 2434 /* update rhs: bb1 = bb - B*x */ 2435 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2436 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2437 2438 /* local sweep */ 2439 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2440 } 2441 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2442 if (flag & SOR_ZERO_INITIAL_GUESS) { 2443 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2444 its--; 2445 } 2446 while (its--) { 2447 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2448 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2449 2450 /* update rhs: bb1 = bb - B*x */ 2451 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2452 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2453 2454 /* local sweep */ 2455 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2456 } 2457 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2458 2459 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2460 PetscFunctionReturn(0); 2461 } 2462 2463 #undef __FUNCT__ 2464 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ" 2465 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2466 { 2467 PetscErrorCode ierr; 2468 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2469 PetscInt N,i,*garray = aij->garray; 2470 PetscInt ib,jb,bs = A->rmap->bs; 2471 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2472 MatScalar *a_val = a_aij->a; 2473 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2474 MatScalar *b_val = b_aij->a; 2475 PetscReal *work; 2476 2477 PetscFunctionBegin; 2478 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2479 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2480 if (type == NORM_2) { 2481 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2482 for (jb=0; jb<bs; jb++) { 2483 for (ib=0; ib<bs; ib++) { 2484 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2485 a_val++; 2486 } 2487 } 2488 } 2489 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2490 for (jb=0; jb<bs; jb++) { 2491 for (ib=0; ib<bs; ib++) { 2492 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2493 b_val++; 2494 } 2495 } 2496 } 2497 } else if (type == NORM_1) { 2498 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2499 for (jb=0; jb<bs; jb++) { 2500 for (ib=0; ib<bs; ib++) { 2501 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2502 a_val++; 2503 } 2504 } 2505 } 2506 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2507 for (jb=0; jb<bs; jb++) { 2508 for (ib=0; ib<bs; ib++) { 2509 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2510 b_val++; 2511 } 2512 } 2513 } 2514 } else if (type == NORM_INFINITY) { 2515 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2516 for (jb=0; jb<bs; jb++) { 2517 for (ib=0; ib<bs; ib++) { 2518 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2519 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2520 a_val++; 2521 } 2522 } 2523 } 2524 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2525 for (jb=0; jb<bs; jb++) { 2526 for (ib=0; ib<bs; ib++) { 2527 int col = garray[b_aij->j[i]] * bs + jb; 2528 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2529 b_val++; 2530 } 2531 } 2532 } 2533 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2534 if (type == NORM_INFINITY) { 2535 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2536 } else { 2537 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2538 } 2539 ierr = PetscFree(work);CHKERRQ(ierr); 2540 if (type == NORM_2) { 2541 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2542 } 2543 PetscFunctionReturn(0); 2544 } 2545 2546 #undef __FUNCT__ 2547 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2548 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2549 { 2550 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2551 PetscErrorCode ierr; 2552 2553 PetscFunctionBegin; 2554 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2555 PetscFunctionReturn(0); 2556 } 2557 2558 2559 /* -------------------------------------------------------------------*/ 2560 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2561 MatGetRow_MPIBAIJ, 2562 MatRestoreRow_MPIBAIJ, 2563 MatMult_MPIBAIJ, 2564 /* 4*/ MatMultAdd_MPIBAIJ, 2565 MatMultTranspose_MPIBAIJ, 2566 MatMultTransposeAdd_MPIBAIJ, 2567 0, 2568 0, 2569 0, 2570 /*10*/ 0, 2571 0, 2572 0, 2573 MatSOR_MPIBAIJ, 2574 MatTranspose_MPIBAIJ, 2575 /*15*/ MatGetInfo_MPIBAIJ, 2576 MatEqual_MPIBAIJ, 2577 MatGetDiagonal_MPIBAIJ, 2578 MatDiagonalScale_MPIBAIJ, 2579 MatNorm_MPIBAIJ, 2580 /*20*/ MatAssemblyBegin_MPIBAIJ, 2581 MatAssemblyEnd_MPIBAIJ, 2582 MatSetOption_MPIBAIJ, 2583 MatZeroEntries_MPIBAIJ, 2584 /*24*/ MatZeroRows_MPIBAIJ, 2585 0, 2586 0, 2587 0, 2588 0, 2589 /*29*/ MatSetUp_MPIBAIJ, 2590 0, 2591 0, 2592 0, 2593 0, 2594 /*34*/ MatDuplicate_MPIBAIJ, 2595 0, 2596 0, 2597 0, 2598 0, 2599 /*39*/ MatAXPY_MPIBAIJ, 2600 MatGetSubMatrices_MPIBAIJ, 2601 MatIncreaseOverlap_MPIBAIJ, 2602 MatGetValues_MPIBAIJ, 2603 MatCopy_MPIBAIJ, 2604 /*44*/ 0, 2605 MatScale_MPIBAIJ, 2606 0, 2607 0, 2608 MatZeroRowsColumns_MPIBAIJ, 2609 /*49*/ 0, 2610 0, 2611 0, 2612 0, 2613 0, 2614 /*54*/ MatFDColoringCreate_MPIXAIJ, 2615 0, 2616 MatSetUnfactored_MPIBAIJ, 2617 MatPermute_MPIBAIJ, 2618 MatSetValuesBlocked_MPIBAIJ, 2619 /*59*/ MatGetSubMatrix_MPIBAIJ, 2620 MatDestroy_MPIBAIJ, 2621 MatView_MPIBAIJ, 2622 0, 2623 0, 2624 /*64*/ 0, 2625 0, 2626 0, 2627 0, 2628 0, 2629 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2630 0, 2631 0, 2632 0, 2633 0, 2634 /*74*/ 0, 2635 MatFDColoringApply_BAIJ, 2636 0, 2637 0, 2638 0, 2639 /*79*/ 0, 2640 0, 2641 0, 2642 0, 2643 MatLoad_MPIBAIJ, 2644 /*84*/ 0, 2645 0, 2646 0, 2647 0, 2648 0, 2649 /*89*/ 0, 2650 0, 2651 0, 2652 0, 2653 0, 2654 /*94*/ 0, 2655 0, 2656 0, 2657 0, 2658 0, 2659 /*99*/ 0, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /*104*/0, 2665 MatRealPart_MPIBAIJ, 2666 MatImaginaryPart_MPIBAIJ, 2667 0, 2668 0, 2669 /*109*/0, 2670 0, 2671 0, 2672 0, 2673 0, 2674 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2675 0, 2676 MatGetGhosts_MPIBAIJ, 2677 0, 2678 0, 2679 /*119*/0, 2680 0, 2681 0, 2682 0, 2683 MatGetMultiProcBlock_MPIBAIJ, 2684 /*124*/0, 2685 MatGetColumnNorms_MPIBAIJ, 2686 MatInvertBlockDiagonal_MPIBAIJ, 2687 0, 2688 0, 2689 /*129*/ 0, 2690 0, 2691 0, 2692 0, 2693 0, 2694 /*134*/ 0, 2695 0, 2696 0, 2697 0, 2698 0, 2699 /*139*/ 0, 2700 0, 2701 0, 2702 MatFDColoringSetUp_MPIXAIJ 2703 }; 2704 2705 #undef __FUNCT__ 2706 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2707 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2708 { 2709 PetscFunctionBegin; 2710 *a = ((Mat_MPIBAIJ*)A->data)->A; 2711 PetscFunctionReturn(0); 2712 } 2713 2714 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2715 2716 #undef __FUNCT__ 2717 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2718 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2719 { 2720 PetscInt m,rstart,cstart,cend; 2721 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2722 const PetscInt *JJ =0; 2723 PetscScalar *values=0; 2724 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2725 PetscErrorCode ierr; 2726 2727 PetscFunctionBegin; 2728 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2729 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2730 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2731 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2732 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2733 m = B->rmap->n/bs; 2734 rstart = B->rmap->rstart/bs; 2735 cstart = B->cmap->rstart/bs; 2736 cend = B->cmap->rend/bs; 2737 2738 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2739 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2740 for (i=0; i<m; i++) { 2741 nz = ii[i+1] - ii[i]; 2742 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2743 nz_max = PetscMax(nz_max,nz); 2744 JJ = jj + ii[i]; 2745 for (j=0; j<nz; j++) { 2746 if (*JJ >= cstart) break; 2747 JJ++; 2748 } 2749 d = 0; 2750 for (; j<nz; j++) { 2751 if (*JJ++ >= cend) break; 2752 d++; 2753 } 2754 d_nnz[i] = d; 2755 o_nnz[i] = nz - d; 2756 } 2757 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2758 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2759 2760 values = (PetscScalar*)V; 2761 if (!values) { 2762 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2763 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2764 } 2765 for (i=0; i<m; i++) { 2766 PetscInt row = i + rstart; 2767 PetscInt ncols = ii[i+1] - ii[i]; 2768 const PetscInt *icols = jj + ii[i]; 2769 if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2770 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2771 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2772 } else { /* block ordering does not match so we can only insert one block at a time. */ 2773 PetscInt j; 2774 for (j=0; j<ncols; j++) { 2775 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2776 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2777 } 2778 } 2779 } 2780 2781 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2782 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2783 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2784 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2785 PetscFunctionReturn(0); 2786 } 2787 2788 #undef __FUNCT__ 2789 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2790 /*@C 2791 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2792 (the default parallel PETSc format). 2793 2794 Collective on MPI_Comm 2795 2796 Input Parameters: 2797 + B - the matrix 2798 . bs - the block size 2799 . i - the indices into j for the start of each local row (starts with zero) 2800 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2801 - v - optional values in the matrix 2802 2803 Level: developer 2804 2805 Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2806 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2807 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2808 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2809 block column and the second index is over columns within a block. 2810 2811 .keywords: matrix, aij, compressed row, sparse, parallel 2812 2813 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2814 @*/ 2815 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2816 { 2817 PetscErrorCode ierr; 2818 2819 PetscFunctionBegin; 2820 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2821 PetscValidType(B,1); 2822 PetscValidLogicalCollectiveInt(B,bs,2); 2823 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2824 PetscFunctionReturn(0); 2825 } 2826 2827 #undef __FUNCT__ 2828 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2829 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2830 { 2831 Mat_MPIBAIJ *b; 2832 PetscErrorCode ierr; 2833 PetscInt i; 2834 2835 PetscFunctionBegin; 2836 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2837 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2838 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2839 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2840 2841 if (d_nnz) { 2842 for (i=0; i<B->rmap->n/bs; i++) { 2843 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2844 } 2845 } 2846 if (o_nnz) { 2847 for (i=0; i<B->rmap->n/bs; i++) { 2848 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2849 } 2850 } 2851 2852 b = (Mat_MPIBAIJ*)B->data; 2853 b->bs2 = bs*bs; 2854 b->mbs = B->rmap->n/bs; 2855 b->nbs = B->cmap->n/bs; 2856 b->Mbs = B->rmap->N/bs; 2857 b->Nbs = B->cmap->N/bs; 2858 2859 for (i=0; i<=b->size; i++) { 2860 b->rangebs[i] = B->rmap->range[i]/bs; 2861 } 2862 b->rstartbs = B->rmap->rstart/bs; 2863 b->rendbs = B->rmap->rend/bs; 2864 b->cstartbs = B->cmap->rstart/bs; 2865 b->cendbs = B->cmap->rend/bs; 2866 2867 if (!B->preallocated) { 2868 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2869 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2870 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2871 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2872 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2873 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2874 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2875 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2876 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2877 } 2878 2879 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2880 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2881 B->preallocated = PETSC_TRUE; 2882 PetscFunctionReturn(0); 2883 } 2884 2885 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2886 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2887 2888 #undef __FUNCT__ 2889 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2890 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2891 { 2892 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2893 PetscErrorCode ierr; 2894 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2895 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2896 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2897 2898 PetscFunctionBegin; 2899 ierr = PetscMalloc1((M+1),&ii);CHKERRQ(ierr); 2900 ii[0] = 0; 2901 for (i=0; i<M; i++) { 2902 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2903 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2904 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2905 /* remove one from count of matrix has diagonal */ 2906 for (j=id[i]; j<id[i+1]; j++) { 2907 if (jd[j] == i) {ii[i+1]--;break;} 2908 } 2909 } 2910 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 2911 cnt = 0; 2912 for (i=0; i<M; i++) { 2913 for (j=io[i]; j<io[i+1]; j++) { 2914 if (garray[jo[j]] > rstart) break; 2915 jj[cnt++] = garray[jo[j]]; 2916 } 2917 for (k=id[i]; k<id[i+1]; k++) { 2918 if (jd[k] != i) { 2919 jj[cnt++] = rstart + jd[k]; 2920 } 2921 } 2922 for (; j<io[i+1]; j++) { 2923 jj[cnt++] = garray[jo[j]]; 2924 } 2925 } 2926 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2927 PetscFunctionReturn(0); 2928 } 2929 2930 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2931 2932 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2933 2934 #undef __FUNCT__ 2935 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 2936 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2937 { 2938 PetscErrorCode ierr; 2939 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2940 Mat B; 2941 Mat_MPIAIJ *b; 2942 2943 PetscFunctionBegin; 2944 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2945 2946 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2947 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2948 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2949 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2950 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2951 b = (Mat_MPIAIJ*) B->data; 2952 2953 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2954 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2955 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2956 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2957 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2958 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2959 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2960 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2961 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2962 if (reuse == MAT_REUSE_MATRIX) { 2963 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2964 } else { 2965 *newmat = B; 2966 } 2967 PetscFunctionReturn(0); 2968 } 2969 2970 #if defined(PETSC_HAVE_MUMPS) 2971 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2972 #endif 2973 2974 /*MC 2975 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2976 2977 Options Database Keys: 2978 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2979 . -mat_block_size <bs> - set the blocksize used to store the matrix 2980 - -mat_use_hash_table <fact> 2981 2982 Level: beginner 2983 2984 .seealso: MatCreateMPIBAIJ 2985 M*/ 2986 2987 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2988 2989 #undef __FUNCT__ 2990 #define __FUNCT__ "MatCreate_MPIBAIJ" 2991 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2992 { 2993 Mat_MPIBAIJ *b; 2994 PetscErrorCode ierr; 2995 PetscBool flg; 2996 2997 PetscFunctionBegin; 2998 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2999 B->data = (void*)b; 3000 3001 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3002 B->assembled = PETSC_FALSE; 3003 3004 B->insertmode = NOT_SET_VALUES; 3005 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3006 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3007 3008 /* build local table of row and column ownerships */ 3009 ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr); 3010 3011 /* build cache for off array entries formed */ 3012 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 3013 3014 b->donotstash = PETSC_FALSE; 3015 b->colmap = NULL; 3016 b->garray = NULL; 3017 b->roworiented = PETSC_TRUE; 3018 3019 /* stuff used in block assembly */ 3020 b->barray = 0; 3021 3022 /* stuff used for matrix vector multiply */ 3023 b->lvec = 0; 3024 b->Mvctx = 0; 3025 3026 /* stuff for MatGetRow() */ 3027 b->rowindices = 0; 3028 b->rowvalues = 0; 3029 b->getrowactive = PETSC_FALSE; 3030 3031 /* hash table stuff */ 3032 b->ht = 0; 3033 b->hd = 0; 3034 b->ht_size = 0; 3035 b->ht_flag = PETSC_FALSE; 3036 b->ht_fact = 0; 3037 b->ht_total_ct = 0; 3038 b->ht_insert_ct = 0; 3039 3040 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3041 b->ijonly = PETSC_FALSE; 3042 3043 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3044 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 3045 if (flg) { 3046 PetscReal fact = 1.39; 3047 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3048 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3049 if (fact <= 1.0) fact = 1.39; 3050 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3051 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3052 } 3053 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3054 3055 #if defined(PETSC_HAVE_MUMPS) 3056 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3057 #endif 3058 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3059 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3060 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3061 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3062 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3063 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3064 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3065 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3066 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3067 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3068 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3069 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3070 PetscFunctionReturn(0); 3071 } 3072 3073 /*MC 3074 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3075 3076 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3077 and MATMPIBAIJ otherwise. 3078 3079 Options Database Keys: 3080 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3081 3082 Level: beginner 3083 3084 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3085 M*/ 3086 3087 #undef __FUNCT__ 3088 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3089 /*@C 3090 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3091 (block compressed row). For good matrix assembly performance 3092 the user should preallocate the matrix storage by setting the parameters 3093 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3094 performance can be increased by more than a factor of 50. 3095 3096 Collective on Mat 3097 3098 Input Parameters: 3099 + B - the matrix 3100 . bs - size of block 3101 . d_nz - number of block nonzeros per block row in diagonal portion of local 3102 submatrix (same for all local rows) 3103 . d_nnz - array containing the number of block nonzeros in the various block rows 3104 of the in diagonal portion of the local (possibly different for each block 3105 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3106 set it even if it is zero. 3107 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3108 submatrix (same for all local rows). 3109 - o_nnz - array containing the number of nonzeros in the various block rows of the 3110 off-diagonal portion of the local submatrix (possibly different for 3111 each block row) or NULL. 3112 3113 If the *_nnz parameter is given then the *_nz parameter is ignored 3114 3115 Options Database Keys: 3116 + -mat_block_size - size of the blocks to use 3117 - -mat_use_hash_table <fact> 3118 3119 Notes: 3120 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3121 than it must be used on all processors that share the object for that argument. 3122 3123 Storage Information: 3124 For a square global matrix we define each processor's diagonal portion 3125 to be its local rows and the corresponding columns (a square submatrix); 3126 each processor's off-diagonal portion encompasses the remainder of the 3127 local matrix (a rectangular submatrix). 3128 3129 The user can specify preallocated storage for the diagonal part of 3130 the local submatrix with either d_nz or d_nnz (not both). Set 3131 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3132 memory allocation. Likewise, specify preallocated storage for the 3133 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3134 3135 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3136 the figure below we depict these three local rows and all columns (0-11). 3137 3138 .vb 3139 0 1 2 3 4 5 6 7 8 9 10 11 3140 -------------------------- 3141 row 3 |o o o d d d o o o o o o 3142 row 4 |o o o d d d o o o o o o 3143 row 5 |o o o d d d o o o o o o 3144 -------------------------- 3145 .ve 3146 3147 Thus, any entries in the d locations are stored in the d (diagonal) 3148 submatrix, and any entries in the o locations are stored in the 3149 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3150 stored simply in the MATSEQBAIJ format for compressed row storage. 3151 3152 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3153 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3154 In general, for PDE problems in which most nonzeros are near the diagonal, 3155 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3156 or you will get TERRIBLE performance; see the users' manual chapter on 3157 matrices. 3158 3159 You can call MatGetInfo() to get information on how effective the preallocation was; 3160 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3161 You can also run with the option -info and look for messages with the string 3162 malloc in them to see if additional memory allocation was needed. 3163 3164 Level: intermediate 3165 3166 .keywords: matrix, block, aij, compressed row, sparse, parallel 3167 3168 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3169 @*/ 3170 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3171 { 3172 PetscErrorCode ierr; 3173 3174 PetscFunctionBegin; 3175 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3176 PetscValidType(B,1); 3177 PetscValidLogicalCollectiveInt(B,bs,2); 3178 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3179 PetscFunctionReturn(0); 3180 } 3181 3182 #undef __FUNCT__ 3183 #define __FUNCT__ "MatCreateBAIJ" 3184 /*@C 3185 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3186 (block compressed row). For good matrix assembly performance 3187 the user should preallocate the matrix storage by setting the parameters 3188 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3189 performance can be increased by more than a factor of 50. 3190 3191 Collective on MPI_Comm 3192 3193 Input Parameters: 3194 + comm - MPI communicator 3195 . bs - size of blockk 3196 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3197 This value should be the same as the local size used in creating the 3198 y vector for the matrix-vector product y = Ax. 3199 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3200 This value should be the same as the local size used in creating the 3201 x vector for the matrix-vector product y = Ax. 3202 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3203 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3204 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3205 submatrix (same for all local rows) 3206 . d_nnz - array containing the number of nonzero blocks in the various block rows 3207 of the in diagonal portion of the local (possibly different for each block 3208 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3209 and set it even if it is zero. 3210 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3211 submatrix (same for all local rows). 3212 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3213 off-diagonal portion of the local submatrix (possibly different for 3214 each block row) or NULL. 3215 3216 Output Parameter: 3217 . A - the matrix 3218 3219 Options Database Keys: 3220 + -mat_block_size - size of the blocks to use 3221 - -mat_use_hash_table <fact> 3222 3223 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3224 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3225 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3226 3227 Notes: 3228 If the *_nnz parameter is given then the *_nz parameter is ignored 3229 3230 A nonzero block is any block that as 1 or more nonzeros in it 3231 3232 The user MUST specify either the local or global matrix dimensions 3233 (possibly both). 3234 3235 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3236 than it must be used on all processors that share the object for that argument. 3237 3238 Storage Information: 3239 For a square global matrix we define each processor's diagonal portion 3240 to be its local rows and the corresponding columns (a square submatrix); 3241 each processor's off-diagonal portion encompasses the remainder of the 3242 local matrix (a rectangular submatrix). 3243 3244 The user can specify preallocated storage for the diagonal part of 3245 the local submatrix with either d_nz or d_nnz (not both). Set 3246 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3247 memory allocation. Likewise, specify preallocated storage for the 3248 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3249 3250 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3251 the figure below we depict these three local rows and all columns (0-11). 3252 3253 .vb 3254 0 1 2 3 4 5 6 7 8 9 10 11 3255 -------------------------- 3256 row 3 |o o o d d d o o o o o o 3257 row 4 |o o o d d d o o o o o o 3258 row 5 |o o o d d d o o o o o o 3259 -------------------------- 3260 .ve 3261 3262 Thus, any entries in the d locations are stored in the d (diagonal) 3263 submatrix, and any entries in the o locations are stored in the 3264 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3265 stored simply in the MATSEQBAIJ format for compressed row storage. 3266 3267 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3268 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3269 In general, for PDE problems in which most nonzeros are near the diagonal, 3270 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3271 or you will get TERRIBLE performance; see the users' manual chapter on 3272 matrices. 3273 3274 Level: intermediate 3275 3276 .keywords: matrix, block, aij, compressed row, sparse, parallel 3277 3278 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3279 @*/ 3280 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3281 { 3282 PetscErrorCode ierr; 3283 PetscMPIInt size; 3284 3285 PetscFunctionBegin; 3286 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3287 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3288 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3289 if (size > 1) { 3290 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3291 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3292 } else { 3293 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3294 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3295 } 3296 PetscFunctionReturn(0); 3297 } 3298 3299 #undef __FUNCT__ 3300 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3301 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3302 { 3303 Mat mat; 3304 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3305 PetscErrorCode ierr; 3306 PetscInt len=0; 3307 3308 PetscFunctionBegin; 3309 *newmat = 0; 3310 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3311 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3312 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3313 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3314 3315 mat->factortype = matin->factortype; 3316 mat->preallocated = PETSC_TRUE; 3317 mat->assembled = PETSC_TRUE; 3318 mat->insertmode = NOT_SET_VALUES; 3319 3320 a = (Mat_MPIBAIJ*)mat->data; 3321 mat->rmap->bs = matin->rmap->bs; 3322 a->bs2 = oldmat->bs2; 3323 a->mbs = oldmat->mbs; 3324 a->nbs = oldmat->nbs; 3325 a->Mbs = oldmat->Mbs; 3326 a->Nbs = oldmat->Nbs; 3327 3328 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3329 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3330 3331 a->size = oldmat->size; 3332 a->rank = oldmat->rank; 3333 a->donotstash = oldmat->donotstash; 3334 a->roworiented = oldmat->roworiented; 3335 a->rowindices = 0; 3336 a->rowvalues = 0; 3337 a->getrowactive = PETSC_FALSE; 3338 a->barray = 0; 3339 a->rstartbs = oldmat->rstartbs; 3340 a->rendbs = oldmat->rendbs; 3341 a->cstartbs = oldmat->cstartbs; 3342 a->cendbs = oldmat->cendbs; 3343 3344 /* hash table stuff */ 3345 a->ht = 0; 3346 a->hd = 0; 3347 a->ht_size = 0; 3348 a->ht_flag = oldmat->ht_flag; 3349 a->ht_fact = oldmat->ht_fact; 3350 a->ht_total_ct = 0; 3351 a->ht_insert_ct = 0; 3352 3353 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3354 if (oldmat->colmap) { 3355 #if defined(PETSC_USE_CTABLE) 3356 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3357 #else 3358 ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr); 3359 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3360 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3361 #endif 3362 } else a->colmap = 0; 3363 3364 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3365 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3366 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3367 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3368 } else a->garray = 0; 3369 3370 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3371 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3372 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3373 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3374 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3375 3376 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3377 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3378 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3379 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3380 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3381 *newmat = mat; 3382 PetscFunctionReturn(0); 3383 } 3384 3385 #undef __FUNCT__ 3386 #define __FUNCT__ "MatLoad_MPIBAIJ" 3387 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3388 { 3389 PetscErrorCode ierr; 3390 int fd; 3391 PetscInt i,nz,j,rstart,rend; 3392 PetscScalar *vals,*buf; 3393 MPI_Comm comm; 3394 MPI_Status status; 3395 PetscMPIInt rank,size,maxnz; 3396 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3397 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3398 PetscInt jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax; 3399 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3400 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3401 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3402 3403 PetscFunctionBegin; 3404 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3405 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3406 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3407 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3408 if (bs < 0) bs = 1; 3409 3410 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3411 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3412 if (!rank) { 3413 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3414 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3415 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3416 } 3417 3418 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3419 3420 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3421 M = header[1]; N = header[2]; 3422 3423 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3424 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3425 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3426 3427 /* If global sizes are set, check if they are consistent with that given in the file */ 3428 if (sizesset) { 3429 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3430 } 3431 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3432 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3433 3434 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3435 3436 /* 3437 This code adds extra rows to make sure the number of rows is 3438 divisible by the blocksize 3439 */ 3440 Mbs = M/bs; 3441 extra_rows = bs - M + bs*Mbs; 3442 if (extra_rows == bs) extra_rows = 0; 3443 else Mbs++; 3444 if (extra_rows && !rank) { 3445 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3446 } 3447 3448 /* determine ownership of all rows */ 3449 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3450 mbs = Mbs/size + ((Mbs % size) > rank); 3451 m = mbs*bs; 3452 } else { /* User set */ 3453 m = newmat->rmap->n; 3454 mbs = m/bs; 3455 } 3456 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 3457 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3458 3459 /* process 0 needs enough room for process with most rows */ 3460 if (!rank) { 3461 mmax = rowners[1]; 3462 for (i=2; i<=size; i++) { 3463 mmax = PetscMax(mmax,rowners[i]); 3464 } 3465 mmax*=bs; 3466 } else mmax = -1; /* unused, but compiler warns anyway */ 3467 3468 rowners[0] = 0; 3469 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3470 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3471 rstart = rowners[rank]; 3472 rend = rowners[rank+1]; 3473 3474 /* distribute row lengths to all processors */ 3475 ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr); 3476 if (!rank) { 3477 mend = m; 3478 if (size == 1) mend = mend - extra_rows; 3479 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3480 for (j=mend; j<m; j++) locrowlens[j] = 1; 3481 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3482 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3483 for (j=0; j<m; j++) { 3484 procsnz[0] += locrowlens[j]; 3485 } 3486 for (i=1; i<size; i++) { 3487 mend = browners[i+1] - browners[i]; 3488 if (i == size-1) mend = mend - extra_rows; 3489 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3490 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3491 /* calculate the number of nonzeros on each processor */ 3492 for (j=0; j<browners[i+1]-browners[i]; j++) { 3493 procsnz[i] += rowlengths[j]; 3494 } 3495 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3496 } 3497 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3498 } else { 3499 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3500 } 3501 3502 if (!rank) { 3503 /* determine max buffer needed and allocate it */ 3504 maxnz = procsnz[0]; 3505 for (i=1; i<size; i++) { 3506 maxnz = PetscMax(maxnz,procsnz[i]); 3507 } 3508 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3509 3510 /* read in my part of the matrix column indices */ 3511 nz = procsnz[0]; 3512 ierr = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr); 3513 mycols = ibuf; 3514 if (size == 1) nz -= extra_rows; 3515 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3516 if (size == 1) { 3517 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3518 } 3519 3520 /* read in every ones (except the last) and ship off */ 3521 for (i=1; i<size-1; i++) { 3522 nz = procsnz[i]; 3523 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3524 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3525 } 3526 /* read in the stuff for the last proc */ 3527 if (size != 1) { 3528 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3529 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3530 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3531 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3532 } 3533 ierr = PetscFree(cols);CHKERRQ(ierr); 3534 } else { 3535 /* determine buffer space needed for message */ 3536 nz = 0; 3537 for (i=0; i<m; i++) { 3538 nz += locrowlens[i]; 3539 } 3540 ierr = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr); 3541 mycols = ibuf; 3542 /* receive message of column indices*/ 3543 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3544 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3545 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3546 } 3547 3548 /* loop over local rows, determining number of off diagonal entries */ 3549 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 3550 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 3551 rowcount = 0; nzcount = 0; 3552 for (i=0; i<mbs; i++) { 3553 dcount = 0; 3554 odcount = 0; 3555 for (j=0; j<bs; j++) { 3556 kmax = locrowlens[rowcount]; 3557 for (k=0; k<kmax; k++) { 3558 tmp = mycols[nzcount++]/bs; 3559 if (!mask[tmp]) { 3560 mask[tmp] = 1; 3561 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3562 else masked1[dcount++] = tmp; 3563 } 3564 } 3565 rowcount++; 3566 } 3567 3568 dlens[i] = dcount; 3569 odlens[i] = odcount; 3570 3571 /* zero out the mask elements we set */ 3572 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3573 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3574 } 3575 3576 3577 if (!sizesset) { 3578 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3579 } 3580 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3581 3582 if (!rank) { 3583 ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr); 3584 /* read in my part of the matrix numerical values */ 3585 nz = procsnz[0]; 3586 vals = buf; 3587 mycols = ibuf; 3588 if (size == 1) nz -= extra_rows; 3589 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3590 if (size == 1) { 3591 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3592 } 3593 3594 /* insert into matrix */ 3595 jj = rstart*bs; 3596 for (i=0; i<m; i++) { 3597 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3598 mycols += locrowlens[i]; 3599 vals += locrowlens[i]; 3600 jj++; 3601 } 3602 /* read in other processors (except the last one) and ship out */ 3603 for (i=1; i<size-1; i++) { 3604 nz = procsnz[i]; 3605 vals = buf; 3606 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3607 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3608 } 3609 /* the last proc */ 3610 if (size != 1) { 3611 nz = procsnz[i] - extra_rows; 3612 vals = buf; 3613 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3614 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3615 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3616 } 3617 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3618 } else { 3619 /* receive numeric values */ 3620 ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr); 3621 3622 /* receive message of values*/ 3623 vals = buf; 3624 mycols = ibuf; 3625 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3626 3627 /* insert into matrix */ 3628 jj = rstart*bs; 3629 for (i=0; i<m; i++) { 3630 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3631 mycols += locrowlens[i]; 3632 vals += locrowlens[i]; 3633 jj++; 3634 } 3635 } 3636 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3637 ierr = PetscFree(buf);CHKERRQ(ierr); 3638 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3639 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3640 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3641 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3642 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3643 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3644 PetscFunctionReturn(0); 3645 } 3646 3647 #undef __FUNCT__ 3648 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3649 /*@ 3650 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3651 3652 Input Parameters: 3653 . mat - the matrix 3654 . fact - factor 3655 3656 Not Collective, each process can use a different factor 3657 3658 Level: advanced 3659 3660 Notes: 3661 This can also be set by the command line option: -mat_use_hash_table <fact> 3662 3663 .keywords: matrix, hashtable, factor, HT 3664 3665 .seealso: MatSetOption() 3666 @*/ 3667 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3668 { 3669 PetscErrorCode ierr; 3670 3671 PetscFunctionBegin; 3672 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3673 PetscFunctionReturn(0); 3674 } 3675 3676 #undef __FUNCT__ 3677 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3678 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3679 { 3680 Mat_MPIBAIJ *baij; 3681 3682 PetscFunctionBegin; 3683 baij = (Mat_MPIBAIJ*)mat->data; 3684 baij->ht_fact = fact; 3685 PetscFunctionReturn(0); 3686 } 3687 3688 #undef __FUNCT__ 3689 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3690 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3691 { 3692 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3693 3694 PetscFunctionBegin; 3695 if (Ad) *Ad = a->A; 3696 if (Ao) *Ao = a->B; 3697 if (colmap) *colmap = a->garray; 3698 PetscFunctionReturn(0); 3699 } 3700 3701 /* 3702 Special version for direct calls from Fortran (to eliminate two function call overheads 3703 */ 3704 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3705 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3706 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3707 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3708 #endif 3709 3710 #undef __FUNCT__ 3711 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3712 /*@C 3713 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3714 3715 Collective on Mat 3716 3717 Input Parameters: 3718 + mat - the matrix 3719 . min - number of input rows 3720 . im - input rows 3721 . nin - number of input columns 3722 . in - input columns 3723 . v - numerical values input 3724 - addvin - INSERT_VALUES or ADD_VALUES 3725 3726 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3727 3728 Level: advanced 3729 3730 .seealso: MatSetValuesBlocked() 3731 @*/ 3732 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3733 { 3734 /* convert input arguments to C version */ 3735 Mat mat = *matin; 3736 PetscInt m = *min, n = *nin; 3737 InsertMode addv = *addvin; 3738 3739 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3740 const MatScalar *value; 3741 MatScalar *barray = baij->barray; 3742 PetscBool roworiented = baij->roworiented; 3743 PetscErrorCode ierr; 3744 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3745 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3746 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3747 3748 PetscFunctionBegin; 3749 /* tasks normally handled by MatSetValuesBlocked() */ 3750 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3751 #if defined(PETSC_USE_DEBUG) 3752 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3753 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3754 #endif 3755 if (mat->assembled) { 3756 mat->was_assembled = PETSC_TRUE; 3757 mat->assembled = PETSC_FALSE; 3758 } 3759 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3760 3761 3762 if (!barray) { 3763 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3764 baij->barray = barray; 3765 } 3766 3767 if (roworiented) stepval = (n-1)*bs; 3768 else stepval = (m-1)*bs; 3769 3770 for (i=0; i<m; i++) { 3771 if (im[i] < 0) continue; 3772 #if defined(PETSC_USE_DEBUG) 3773 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3774 #endif 3775 if (im[i] >= rstart && im[i] < rend) { 3776 row = im[i] - rstart; 3777 for (j=0; j<n; j++) { 3778 /* If NumCol = 1 then a copy is not required */ 3779 if ((roworiented) && (n == 1)) { 3780 barray = (MatScalar*)v + i*bs2; 3781 } else if ((!roworiented) && (m == 1)) { 3782 barray = (MatScalar*)v + j*bs2; 3783 } else { /* Here a copy is required */ 3784 if (roworiented) { 3785 value = v + i*(stepval+bs)*bs + j*bs; 3786 } else { 3787 value = v + j*(stepval+bs)*bs + i*bs; 3788 } 3789 for (ii=0; ii<bs; ii++,value+=stepval) { 3790 for (jj=0; jj<bs; jj++) { 3791 *barray++ = *value++; 3792 } 3793 } 3794 barray -=bs2; 3795 } 3796 3797 if (in[j] >= cstart && in[j] < cend) { 3798 col = in[j] - cstart; 3799 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3800 } else if (in[j] < 0) continue; 3801 #if defined(PETSC_USE_DEBUG) 3802 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3803 #endif 3804 else { 3805 if (mat->was_assembled) { 3806 if (!baij->colmap) { 3807 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3808 } 3809 3810 #if defined(PETSC_USE_DEBUG) 3811 #if defined(PETSC_USE_CTABLE) 3812 { PetscInt data; 3813 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3814 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3815 } 3816 #else 3817 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3818 #endif 3819 #endif 3820 #if defined(PETSC_USE_CTABLE) 3821 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3822 col = (col - 1)/bs; 3823 #else 3824 col = (baij->colmap[in[j]] - 1)/bs; 3825 #endif 3826 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3827 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3828 col = in[j]; 3829 } 3830 } else col = in[j]; 3831 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3832 } 3833 } 3834 } else { 3835 if (!baij->donotstash) { 3836 if (roworiented) { 3837 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3838 } else { 3839 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3840 } 3841 } 3842 } 3843 } 3844 3845 /* task normally handled by MatSetValuesBlocked() */ 3846 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3847 PetscFunctionReturn(0); 3848 } 3849 3850 #undef __FUNCT__ 3851 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 3852 /*@ 3853 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 3854 CSR format the local rows. 3855 3856 Collective on MPI_Comm 3857 3858 Input Parameters: 3859 + comm - MPI communicator 3860 . bs - the block size, only a block size of 1 is supported 3861 . m - number of local rows (Cannot be PETSC_DECIDE) 3862 . n - This value should be the same as the local size used in creating the 3863 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3864 calculated if N is given) For square matrices n is almost always m. 3865 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3866 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3867 . i - row indices 3868 . j - column indices 3869 - a - matrix values 3870 3871 Output Parameter: 3872 . mat - the matrix 3873 3874 Level: intermediate 3875 3876 Notes: 3877 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3878 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3879 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3880 3881 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3882 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3883 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3884 with column-major ordering within blocks. 3885 3886 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3887 3888 .keywords: matrix, aij, compressed row, sparse, parallel 3889 3890 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3891 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3892 @*/ 3893 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3894 { 3895 PetscErrorCode ierr; 3896 3897 PetscFunctionBegin; 3898 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3899 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3900 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3901 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3902 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3903 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3904 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3905 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3906 PetscFunctionReturn(0); 3907 } 3908