1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \ 59 N = nrow1++ - 1; a->nz++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \ 93 N = nrow2++ - 1; b->nz++; \ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValues_MPIAIJ" 107 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 110 PetscScalar value; 111 PetscErrorCode ierr; 112 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 113 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 114 PetscTruth roworiented = aij->roworiented; 115 116 /* Some Variables required in the macro */ 117 Mat A = aij->A; 118 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 119 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 120 PetscScalar *aa = a->a; 121 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 122 Mat B = aij->B; 123 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 124 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 125 PetscScalar *ba = b->a; 126 127 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 128 PetscInt nonew = a->nonew; 129 PetscScalar *ap1,*ap2; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (im[i] < 0) continue; 134 #if defined(PETSC_USE_DEBUG) 135 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 136 #endif 137 if (im[i] >= rstart && im[i] < rend) { 138 row = im[i] - rstart; 139 lastcol1 = -1; 140 rp1 = aj + ai[row]; 141 ap1 = aa + ai[row]; 142 rmax1 = aimax[row]; 143 nrow1 = ailen[row]; 144 low1 = 0; 145 high1 = nrow1; 146 lastcol2 = -1; 147 rp2 = bj + bi[row]; 148 ap2 = ba + bi[row]; 149 rmax2 = bimax[row]; 150 nrow2 = bilen[row]; 151 low2 = 0; 152 high2 = nrow2; 153 154 for (j=0; j<n; j++) { 155 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 156 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 157 if (in[j] >= cstart && in[j] < cend){ 158 col = in[j] - cstart; 159 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 160 } else if (in[j] < 0) continue; 161 #if defined(PETSC_USE_DEBUG) 162 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 163 #endif 164 else { 165 if (mat->was_assembled) { 166 if (!aij->colmap) { 167 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 168 } 169 #if defined (PETSC_USE_CTABLE) 170 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 171 col--; 172 #else 173 col = aij->colmap[in[j]] - 1; 174 #endif 175 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 176 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 177 col = in[j]; 178 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 179 B = aij->B; 180 b = (Mat_SeqAIJ*)B->data; 181 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 182 rp2 = bj + bi[row]; 183 ap2 = ba + bi[row]; 184 rmax2 = bimax[row]; 185 nrow2 = bilen[row]; 186 low2 = 0; 187 high2 = nrow2; 188 bm = aij->B->m; 189 ba = b->a; 190 } 191 } else col = in[j]; 192 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 193 } 194 } 195 } else { 196 if (!aij->donotstash) { 197 if (roworiented) { 198 if (ignorezeroentries && v[i*n] == 0.0) continue; 199 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 200 } else { 201 if (ignorezeroentries && v[i] == 0.0) continue; 202 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 203 } 204 } 205 } 206 } 207 PetscFunctionReturn(0); 208 } 209 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "MatGetValues_MPIAIJ" 213 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 214 { 215 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 216 PetscErrorCode ierr; 217 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 218 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 219 220 PetscFunctionBegin; 221 for (i=0; i<m; i++) { 222 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 223 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 224 if (idxm[i] >= rstart && idxm[i] < rend) { 225 row = idxm[i] - rstart; 226 for (j=0; j<n; j++) { 227 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 228 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 229 if (idxn[j] >= cstart && idxn[j] < cend){ 230 col = idxn[j] - cstart; 231 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 232 } else { 233 if (!aij->colmap) { 234 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 235 } 236 #if defined (PETSC_USE_CTABLE) 237 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 238 col --; 239 #else 240 col = aij->colmap[idxn[j]] - 1; 241 #endif 242 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 243 else { 244 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 245 } 246 } 247 } 248 } else { 249 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 250 } 251 } 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 257 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 258 { 259 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 260 PetscErrorCode ierr; 261 PetscInt nstash,reallocs; 262 InsertMode addv; 263 264 PetscFunctionBegin; 265 if (aij->donotstash) { 266 PetscFunctionReturn(0); 267 } 268 269 /* make sure all processors are either in INSERTMODE or ADDMODE */ 270 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 271 if (addv == (ADD_VALUES|INSERT_VALUES)) { 272 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 273 } 274 mat->insertmode = addv; /* in case this processor had no cache */ 275 276 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 277 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 278 ierr = PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 284 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 287 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 288 PetscErrorCode ierr; 289 PetscMPIInt n; 290 PetscInt i,j,rstart,ncols,flg; 291 PetscInt *row,*col,other_disassembled; 292 PetscScalar *val; 293 InsertMode addv = mat->insertmode; 294 295 PetscFunctionBegin; 296 if (!aij->donotstash) { 297 while (1) { 298 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 299 if (!flg) break; 300 301 for (i=0; i<n;) { 302 /* Now identify the consecutive vals belonging to the same row */ 303 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 304 if (j < n) ncols = j-i; 305 else ncols = n-i; 306 /* Now assemble all these values with a single function call */ 307 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 308 i = j; 309 } 310 } 311 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 312 } 313 a->compressedrow.use = PETSC_FALSE; 314 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 315 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 316 317 /* determine if any processor has disassembled, if so we must 318 also disassemble ourselfs, in order that we may reassemble. */ 319 /* 320 if nonzero structure of submatrix B cannot change then we know that 321 no processor disassembled thus we can skip this stuff 322 */ 323 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 324 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 325 if (mat->was_assembled && !other_disassembled) { 326 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 327 } 328 } 329 /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */ 330 b = (Mat_SeqAIJ *)aij->B->data; 331 332 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 333 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 334 } 335 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 336 b->compressedrow.use = PETSC_TRUE; 337 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 338 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 339 340 if (aij->rowvalues) { 341 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 342 aij->rowvalues = 0; 343 } 344 345 /* used by MatAXPY() */ 346 a->xtoy = 0; b->xtoy = 0; 347 a->XtoY = 0; b->XtoY = 0; 348 349 PetscFunctionReturn(0); 350 } 351 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 354 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 355 { 356 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 357 PetscErrorCode ierr; 358 359 PetscFunctionBegin; 360 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 361 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 362 PetscFunctionReturn(0); 363 } 364 365 #undef __FUNCT__ 366 #define __FUNCT__ "MatZeroRows_MPIAIJ" 367 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 368 { 369 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 370 PetscErrorCode ierr; 371 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 372 PetscInt i,*owners = l->rowners; 373 PetscInt *nprocs,j,idx,nsends,row; 374 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 375 PetscInt *rvalues,count,base,slen,*source; 376 PetscInt *lens,*lrows,*values,rstart=l->rstart; 377 MPI_Comm comm = A->comm; 378 MPI_Request *send_waits,*recv_waits; 379 MPI_Status recv_status,*send_status; 380 #if defined(PETSC_DEBUG) 381 PetscTruth found = PETSC_FALSE; 382 #endif 383 384 PetscFunctionBegin; 385 /* first count number of contributors to each processor */ 386 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 387 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 388 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 389 j = 0; 390 for (i=0; i<N; i++) { 391 if (lastidx > (idx = rows[i])) j = 0; 392 lastidx = idx; 393 for (; j<size; j++) { 394 if (idx >= owners[j] && idx < owners[j+1]) { 395 nprocs[2*j]++; 396 nprocs[2*j+1] = 1; 397 owner[i] = j; 398 #if defined(PETSC_DEBUG) 399 found = PETSC_TRUE; 400 #endif 401 break; 402 } 403 } 404 #if defined(PETSC_DEBUG) 405 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 406 found = PETSC_FALSE; 407 #endif 408 } 409 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 410 411 /* inform other processors of number of messages and max length*/ 412 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 413 414 /* post receives: */ 415 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 416 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 417 for (i=0; i<nrecvs; i++) { 418 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 419 } 420 421 /* do sends: 422 1) starts[i] gives the starting index in svalues for stuff going to 423 the ith processor 424 */ 425 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 426 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 427 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 428 starts[0] = 0; 429 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 430 for (i=0; i<N; i++) { 431 svalues[starts[owner[i]]++] = rows[i]; 432 } 433 434 starts[0] = 0; 435 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 436 count = 0; 437 for (i=0; i<size; i++) { 438 if (nprocs[2*i+1]) { 439 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 440 } 441 } 442 ierr = PetscFree(starts);CHKERRQ(ierr); 443 444 base = owners[rank]; 445 446 /* wait on receives */ 447 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 448 source = lens + nrecvs; 449 count = nrecvs; slen = 0; 450 while (count) { 451 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 452 /* unpack receives into our local space */ 453 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 454 source[imdex] = recv_status.MPI_SOURCE; 455 lens[imdex] = n; 456 slen += n; 457 count--; 458 } 459 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 460 461 /* move the data into the send scatter */ 462 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 463 count = 0; 464 for (i=0; i<nrecvs; i++) { 465 values = rvalues + i*nmax; 466 for (j=0; j<lens[i]; j++) { 467 lrows[count++] = values[j] - base; 468 } 469 } 470 ierr = PetscFree(rvalues);CHKERRQ(ierr); 471 ierr = PetscFree(lens);CHKERRQ(ierr); 472 ierr = PetscFree(owner);CHKERRQ(ierr); 473 ierr = PetscFree(nprocs);CHKERRQ(ierr); 474 475 /* actually zap the local rows */ 476 /* 477 Zero the required rows. If the "diagonal block" of the matrix 478 is square and the user wishes to set the diagonal we use seperate 479 code so that MatSetValues() is not called for each diagonal allocating 480 new memory, thus calling lots of mallocs and slowing things down. 481 482 Contributed by: Matthew Knepley 483 */ 484 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 485 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 486 if ((diag != 0.0) && (l->A->M == l->A->N)) { 487 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 488 } else if (diag != 0.0) { 489 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 490 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 491 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 492 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 493 } 494 for (i = 0; i < slen; i++) { 495 row = lrows[i] + rstart; 496 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 497 } 498 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 499 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 500 } else { 501 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 502 } 503 ierr = PetscFree(lrows);CHKERRQ(ierr); 504 505 /* wait on sends */ 506 if (nsends) { 507 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 508 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 509 ierr = PetscFree(send_status);CHKERRQ(ierr); 510 } 511 ierr = PetscFree(send_waits);CHKERRQ(ierr); 512 ierr = PetscFree(svalues);CHKERRQ(ierr); 513 514 PetscFunctionReturn(0); 515 } 516 517 #undef __FUNCT__ 518 #define __FUNCT__ "MatMult_MPIAIJ" 519 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 520 { 521 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 522 PetscErrorCode ierr; 523 PetscInt nt; 524 525 PetscFunctionBegin; 526 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 527 if (nt != A->n) { 528 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 529 } 530 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 531 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 532 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 533 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 534 PetscFunctionReturn(0); 535 } 536 537 #undef __FUNCT__ 538 #define __FUNCT__ "MatMultAdd_MPIAIJ" 539 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 540 { 541 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 542 PetscErrorCode ierr; 543 544 PetscFunctionBegin; 545 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 546 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 547 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 548 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 549 PetscFunctionReturn(0); 550 } 551 552 #undef __FUNCT__ 553 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 554 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 555 { 556 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 557 PetscErrorCode ierr; 558 PetscTruth merged; 559 560 PetscFunctionBegin; 561 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 562 /* do nondiagonal part */ 563 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 564 if (!merged) { 565 /* send it on its way */ 566 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 567 /* do local part */ 568 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 569 /* receive remote parts: note this assumes the values are not actually */ 570 /* added in yy until the next line, */ 571 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 572 } else { 573 /* do local part */ 574 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 575 /* send it on its way */ 576 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 577 /* values actually were received in the Begin() but we need to call this nop */ 578 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 579 } 580 PetscFunctionReturn(0); 581 } 582 583 EXTERN_C_BEGIN 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 586 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 587 { 588 MPI_Comm comm; 589 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 590 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 591 IS Me,Notme; 592 PetscErrorCode ierr; 593 PetscInt M,N,first,last,*notme,i; 594 PetscMPIInt size; 595 596 PetscFunctionBegin; 597 598 /* Easy test: symmetric diagonal block */ 599 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 600 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 601 if (!*f) PetscFunctionReturn(0); 602 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 603 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 604 if (size == 1) PetscFunctionReturn(0); 605 606 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 607 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 608 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 609 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 610 for (i=0; i<first; i++) notme[i] = i; 611 for (i=last; i<M; i++) notme[i-last+first] = i; 612 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 613 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 614 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 615 Aoff = Aoffs[0]; 616 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 617 Boff = Boffs[0]; 618 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 619 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 620 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 621 ierr = ISDestroy(Me);CHKERRQ(ierr); 622 ierr = ISDestroy(Notme);CHKERRQ(ierr); 623 624 PetscFunctionReturn(0); 625 } 626 EXTERN_C_END 627 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 630 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 631 { 632 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 633 PetscErrorCode ierr; 634 635 PetscFunctionBegin; 636 /* do nondiagonal part */ 637 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 638 /* send it on its way */ 639 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 640 /* do local part */ 641 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 642 /* receive remote parts */ 643 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646 647 /* 648 This only works correctly for square matrices where the subblock A->A is the 649 diagonal block 650 */ 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 653 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 654 { 655 PetscErrorCode ierr; 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 658 PetscFunctionBegin; 659 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 660 if (a->rstart != a->cstart || a->rend != a->cend) { 661 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 662 } 663 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatScale_MPIAIJ" 669 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 670 { 671 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 672 PetscErrorCode ierr; 673 674 PetscFunctionBegin; 675 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 676 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 680 #undef __FUNCT__ 681 #define __FUNCT__ "MatDestroy_MPIAIJ" 682 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 PetscErrorCode ierr; 686 687 PetscFunctionBegin; 688 #if defined(PETSC_USE_LOG) 689 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 690 #endif 691 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 692 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 693 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 694 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 695 #if defined (PETSC_USE_CTABLE) 696 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 697 #else 698 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 699 #endif 700 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 701 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 702 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 703 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 704 ierr = PetscFree(aij);CHKERRQ(ierr); 705 706 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 707 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 708 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 709 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 710 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 711 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 712 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatView_MPIAIJ_Binary" 718 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 719 { 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 721 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 722 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 723 PetscErrorCode ierr; 724 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 725 int fd; 726 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 727 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 728 PetscScalar *column_values; 729 730 PetscFunctionBegin; 731 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 732 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 733 nz = A->nz + B->nz; 734 if (!rank) { 735 header[0] = MAT_FILE_COOKIE; 736 header[1] = mat->M; 737 header[2] = mat->N; 738 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 739 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 740 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 741 /* get largest number of rows any processor has */ 742 rlen = mat->m; 743 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 744 for (i=1; i<size; i++) { 745 rlen = PetscMax(rlen,range[i+1] - range[i]); 746 } 747 } else { 748 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 749 rlen = mat->m; 750 } 751 752 /* load up the local row counts */ 753 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 754 for (i=0; i<mat->m; i++) { 755 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 756 } 757 758 /* store the row lengths to the file */ 759 if (!rank) { 760 MPI_Status status; 761 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 762 for (i=1; i<size; i++) { 763 rlen = range[i+1] - range[i]; 764 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 765 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 766 } 767 } else { 768 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 769 } 770 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 771 772 /* load up the local column indices */ 773 nzmax = nz; /* )th processor needs space a largest processor needs */ 774 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 775 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 776 cnt = 0; 777 for (i=0; i<mat->m; i++) { 778 for (j=B->i[i]; j<B->i[i+1]; j++) { 779 if ( (col = garray[B->j[j]]) > cstart) break; 780 column_indices[cnt++] = col; 781 } 782 for (k=A->i[i]; k<A->i[i+1]; k++) { 783 column_indices[cnt++] = A->j[k] + cstart; 784 } 785 for (; j<B->i[i+1]; j++) { 786 column_indices[cnt++] = garray[B->j[j]]; 787 } 788 } 789 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 790 791 /* store the column indices to the file */ 792 if (!rank) { 793 MPI_Status status; 794 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 795 for (i=1; i<size; i++) { 796 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 797 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 798 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 799 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 800 } 801 } else { 802 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 803 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 804 } 805 ierr = PetscFree(column_indices);CHKERRQ(ierr); 806 807 /* load up the local column values */ 808 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 809 cnt = 0; 810 for (i=0; i<mat->m; i++) { 811 for (j=B->i[i]; j<B->i[i+1]; j++) { 812 if ( garray[B->j[j]] > cstart) break; 813 column_values[cnt++] = B->a[j]; 814 } 815 for (k=A->i[i]; k<A->i[i+1]; k++) { 816 column_values[cnt++] = A->a[k]; 817 } 818 for (; j<B->i[i+1]; j++) { 819 column_values[cnt++] = B->a[j]; 820 } 821 } 822 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 823 824 /* store the column values to the file */ 825 if (!rank) { 826 MPI_Status status; 827 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 828 for (i=1; i<size; i++) { 829 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 830 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 831 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 832 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 833 } 834 } else { 835 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 836 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 837 } 838 ierr = PetscFree(column_values);CHKERRQ(ierr); 839 PetscFunctionReturn(0); 840 } 841 842 #undef __FUNCT__ 843 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 844 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 845 { 846 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 847 PetscErrorCode ierr; 848 PetscMPIInt rank = aij->rank,size = aij->size; 849 PetscTruth isdraw,iascii,isbinary; 850 PetscViewer sviewer; 851 PetscViewerFormat format; 852 853 PetscFunctionBegin; 854 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 855 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 856 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 857 if (iascii) { 858 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 859 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 860 MatInfo info; 861 PetscTruth inodes; 862 863 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 864 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 865 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 866 if (!inodes) { 867 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 868 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 869 } else { 870 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 871 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 872 } 873 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 874 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 875 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 876 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 877 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 878 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 879 PetscFunctionReturn(0); 880 } else if (format == PETSC_VIEWER_ASCII_INFO) { 881 PetscInt inodecount,inodelimit,*inodes; 882 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 883 if (inodes) { 884 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 885 } else { 886 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 887 } 888 PetscFunctionReturn(0); 889 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 890 PetscFunctionReturn(0); 891 } 892 } else if (isbinary) { 893 if (size == 1) { 894 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 895 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 896 } else { 897 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 898 } 899 PetscFunctionReturn(0); 900 } else if (isdraw) { 901 PetscDraw draw; 902 PetscTruth isnull; 903 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 904 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 905 } 906 907 if (size == 1) { 908 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 909 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 910 } else { 911 /* assemble the entire matrix onto first processor. */ 912 Mat A; 913 Mat_SeqAIJ *Aloc; 914 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 915 PetscScalar *a; 916 917 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 918 if (!rank) { 919 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 920 } else { 921 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 922 } 923 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 924 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 925 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 926 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 927 928 /* copy over the A part */ 929 Aloc = (Mat_SeqAIJ*)aij->A->data; 930 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 931 row = aij->rstart; 932 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 933 for (i=0; i<m; i++) { 934 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 935 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 936 } 937 aj = Aloc->j; 938 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 939 940 /* copy over the B part */ 941 Aloc = (Mat_SeqAIJ*)aij->B->data; 942 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 943 row = aij->rstart; 944 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 945 ct = cols; 946 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 947 for (i=0; i<m; i++) { 948 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 949 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 950 } 951 ierr = PetscFree(ct);CHKERRQ(ierr); 952 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 953 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 954 /* 955 Everyone has to call to draw the matrix since the graphics waits are 956 synchronized across all processors that share the PetscDraw object 957 */ 958 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 959 if (!rank) { 960 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 961 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 962 } 963 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 964 ierr = MatDestroy(A);CHKERRQ(ierr); 965 } 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatView_MPIAIJ" 971 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 972 { 973 PetscErrorCode ierr; 974 PetscTruth iascii,isdraw,issocket,isbinary; 975 976 PetscFunctionBegin; 977 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 978 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 979 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 980 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 981 if (iascii || isdraw || isbinary || issocket) { 982 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 983 } else { 984 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 985 } 986 PetscFunctionReturn(0); 987 } 988 989 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatRelax_MPIAIJ" 993 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 994 { 995 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 996 PetscErrorCode ierr; 997 Vec bb1; 998 PetscScalar mone=-1.0; 999 1000 PetscFunctionBegin; 1001 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1002 1003 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1004 1005 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1006 if (flag & SOR_ZERO_INITIAL_GUESS) { 1007 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1008 its--; 1009 } 1010 1011 while (its--) { 1012 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1013 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1014 1015 /* update rhs: bb1 = bb - B*x */ 1016 ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr); 1017 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1018 1019 /* local sweep */ 1020 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1021 CHKERRQ(ierr); 1022 } 1023 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1024 if (flag & SOR_ZERO_INITIAL_GUESS) { 1025 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1026 its--; 1027 } 1028 while (its--) { 1029 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1030 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1031 1032 /* update rhs: bb1 = bb - B*x */ 1033 ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr); 1034 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1035 1036 /* local sweep */ 1037 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1038 CHKERRQ(ierr); 1039 } 1040 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1041 if (flag & SOR_ZERO_INITIAL_GUESS) { 1042 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1043 its--; 1044 } 1045 while (its--) { 1046 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1047 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1048 1049 /* update rhs: bb1 = bb - B*x */ 1050 ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr); 1051 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1052 1053 /* local sweep */ 1054 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1055 CHKERRQ(ierr); 1056 } 1057 } else { 1058 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1059 } 1060 1061 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 #undef __FUNCT__ 1066 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1067 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1068 { 1069 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1070 Mat A = mat->A,B = mat->B; 1071 PetscErrorCode ierr; 1072 PetscReal isend[5],irecv[5]; 1073 1074 PetscFunctionBegin; 1075 info->block_size = 1.0; 1076 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1077 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1078 isend[3] = info->memory; isend[4] = info->mallocs; 1079 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1080 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1081 isend[3] += info->memory; isend[4] += info->mallocs; 1082 if (flag == MAT_LOCAL) { 1083 info->nz_used = isend[0]; 1084 info->nz_allocated = isend[1]; 1085 info->nz_unneeded = isend[2]; 1086 info->memory = isend[3]; 1087 info->mallocs = isend[4]; 1088 } else if (flag == MAT_GLOBAL_MAX) { 1089 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1090 info->nz_used = irecv[0]; 1091 info->nz_allocated = irecv[1]; 1092 info->nz_unneeded = irecv[2]; 1093 info->memory = irecv[3]; 1094 info->mallocs = irecv[4]; 1095 } else if (flag == MAT_GLOBAL_SUM) { 1096 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1097 info->nz_used = irecv[0]; 1098 info->nz_allocated = irecv[1]; 1099 info->nz_unneeded = irecv[2]; 1100 info->memory = irecv[3]; 1101 info->mallocs = irecv[4]; 1102 } 1103 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1104 info->fill_ratio_needed = 0; 1105 info->factor_mallocs = 0; 1106 info->rows_global = (double)matin->M; 1107 info->columns_global = (double)matin->N; 1108 info->rows_local = (double)matin->m; 1109 info->columns_local = (double)matin->N; 1110 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatSetOption_MPIAIJ" 1116 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1117 { 1118 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 1121 PetscFunctionBegin; 1122 switch (op) { 1123 case MAT_NO_NEW_NONZERO_LOCATIONS: 1124 case MAT_YES_NEW_NONZERO_LOCATIONS: 1125 case MAT_COLUMNS_UNSORTED: 1126 case MAT_COLUMNS_SORTED: 1127 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1128 case MAT_KEEP_ZEROED_ROWS: 1129 case MAT_NEW_NONZERO_LOCATION_ERR: 1130 case MAT_USE_INODES: 1131 case MAT_DO_NOT_USE_INODES: 1132 case MAT_IGNORE_ZERO_ENTRIES: 1133 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1134 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1135 break; 1136 case MAT_ROW_ORIENTED: 1137 a->roworiented = PETSC_TRUE; 1138 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1139 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1140 break; 1141 case MAT_ROWS_SORTED: 1142 case MAT_ROWS_UNSORTED: 1143 case MAT_YES_NEW_DIAGONALS: 1144 ierr = PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr); 1145 break; 1146 case MAT_COLUMN_ORIENTED: 1147 a->roworiented = PETSC_FALSE; 1148 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1149 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1150 break; 1151 case MAT_IGNORE_OFF_PROC_ENTRIES: 1152 a->donotstash = PETSC_TRUE; 1153 break; 1154 case MAT_NO_NEW_DIAGONALS: 1155 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1156 case MAT_SYMMETRIC: 1157 case MAT_STRUCTURALLY_SYMMETRIC: 1158 case MAT_HERMITIAN: 1159 case MAT_SYMMETRY_ETERNAL: 1160 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1161 break; 1162 case MAT_NOT_SYMMETRIC: 1163 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1164 case MAT_NOT_HERMITIAN: 1165 case MAT_NOT_SYMMETRY_ETERNAL: 1166 break; 1167 default: 1168 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1169 } 1170 PetscFunctionReturn(0); 1171 } 1172 1173 #undef __FUNCT__ 1174 #define __FUNCT__ "MatGetRow_MPIAIJ" 1175 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1176 { 1177 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1178 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1179 PetscErrorCode ierr; 1180 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1181 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1182 PetscInt *cmap,*idx_p; 1183 1184 PetscFunctionBegin; 1185 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1186 mat->getrowactive = PETSC_TRUE; 1187 1188 if (!mat->rowvalues && (idx || v)) { 1189 /* 1190 allocate enough space to hold information from the longest row. 1191 */ 1192 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1193 PetscInt max = 1,tmp; 1194 for (i=0; i<matin->m; i++) { 1195 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1196 if (max < tmp) { max = tmp; } 1197 } 1198 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1199 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1200 } 1201 1202 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1203 lrow = row - rstart; 1204 1205 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1206 if (!v) {pvA = 0; pvB = 0;} 1207 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1208 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1209 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1210 nztot = nzA + nzB; 1211 1212 cmap = mat->garray; 1213 if (v || idx) { 1214 if (nztot) { 1215 /* Sort by increasing column numbers, assuming A and B already sorted */ 1216 PetscInt imark = -1; 1217 if (v) { 1218 *v = v_p = mat->rowvalues; 1219 for (i=0; i<nzB; i++) { 1220 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1221 else break; 1222 } 1223 imark = i; 1224 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1225 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1226 } 1227 if (idx) { 1228 *idx = idx_p = mat->rowindices; 1229 if (imark > -1) { 1230 for (i=0; i<imark; i++) { 1231 idx_p[i] = cmap[cworkB[i]]; 1232 } 1233 } else { 1234 for (i=0; i<nzB; i++) { 1235 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1236 else break; 1237 } 1238 imark = i; 1239 } 1240 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1241 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1242 } 1243 } else { 1244 if (idx) *idx = 0; 1245 if (v) *v = 0; 1246 } 1247 } 1248 *nz = nztot; 1249 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1250 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1256 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1257 { 1258 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1259 1260 PetscFunctionBegin; 1261 if (!aij->getrowactive) { 1262 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1263 } 1264 aij->getrowactive = PETSC_FALSE; 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatNorm_MPIAIJ" 1270 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1271 { 1272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1273 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1274 PetscErrorCode ierr; 1275 PetscInt i,j,cstart = aij->cstart; 1276 PetscReal sum = 0.0; 1277 PetscScalar *v; 1278 1279 PetscFunctionBegin; 1280 if (aij->size == 1) { 1281 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1282 } else { 1283 if (type == NORM_FROBENIUS) { 1284 v = amat->a; 1285 for (i=0; i<amat->nz; i++) { 1286 #if defined(PETSC_USE_COMPLEX) 1287 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1288 #else 1289 sum += (*v)*(*v); v++; 1290 #endif 1291 } 1292 v = bmat->a; 1293 for (i=0; i<bmat->nz; i++) { 1294 #if defined(PETSC_USE_COMPLEX) 1295 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1296 #else 1297 sum += (*v)*(*v); v++; 1298 #endif 1299 } 1300 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1301 *norm = sqrt(*norm); 1302 } else if (type == NORM_1) { /* max column norm */ 1303 PetscReal *tmp,*tmp2; 1304 PetscInt *jj,*garray = aij->garray; 1305 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1306 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1307 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1308 *norm = 0.0; 1309 v = amat->a; jj = amat->j; 1310 for (j=0; j<amat->nz; j++) { 1311 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1312 } 1313 v = bmat->a; jj = bmat->j; 1314 for (j=0; j<bmat->nz; j++) { 1315 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1316 } 1317 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1318 for (j=0; j<mat->N; j++) { 1319 if (tmp2[j] > *norm) *norm = tmp2[j]; 1320 } 1321 ierr = PetscFree(tmp);CHKERRQ(ierr); 1322 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1323 } else if (type == NORM_INFINITY) { /* max row norm */ 1324 PetscReal ntemp = 0.0; 1325 for (j=0; j<aij->A->m; j++) { 1326 v = amat->a + amat->i[j]; 1327 sum = 0.0; 1328 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1329 sum += PetscAbsScalar(*v); v++; 1330 } 1331 v = bmat->a + bmat->i[j]; 1332 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1333 sum += PetscAbsScalar(*v); v++; 1334 } 1335 if (sum > ntemp) ntemp = sum; 1336 } 1337 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1338 } else { 1339 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1340 } 1341 } 1342 PetscFunctionReturn(0); 1343 } 1344 1345 #undef __FUNCT__ 1346 #define __FUNCT__ "MatTranspose_MPIAIJ" 1347 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1348 { 1349 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1350 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1351 PetscErrorCode ierr; 1352 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1353 Mat B; 1354 PetscScalar *array; 1355 1356 PetscFunctionBegin; 1357 if (!matout && M != N) { 1358 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1359 } 1360 1361 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1362 ierr = MatSetSizes(B,A->n,A->m,N,M);CHKERRQ(ierr); 1363 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1364 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1365 1366 /* copy over the A part */ 1367 Aloc = (Mat_SeqAIJ*)a->A->data; 1368 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1369 row = a->rstart; 1370 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1371 for (i=0; i<m; i++) { 1372 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1373 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1374 } 1375 aj = Aloc->j; 1376 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1377 1378 /* copy over the B part */ 1379 Aloc = (Mat_SeqAIJ*)a->B->data; 1380 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1381 row = a->rstart; 1382 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1383 ct = cols; 1384 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1385 for (i=0; i<m; i++) { 1386 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1387 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1388 } 1389 ierr = PetscFree(ct);CHKERRQ(ierr); 1390 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1391 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1392 if (matout) { 1393 *matout = B; 1394 } else { 1395 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1396 } 1397 PetscFunctionReturn(0); 1398 } 1399 1400 #undef __FUNCT__ 1401 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1402 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1403 { 1404 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1405 Mat a = aij->A,b = aij->B; 1406 PetscErrorCode ierr; 1407 PetscInt s1,s2,s3; 1408 1409 PetscFunctionBegin; 1410 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1411 if (rr) { 1412 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1413 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1414 /* Overlap communication with computation. */ 1415 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1416 } 1417 if (ll) { 1418 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1419 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1420 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1421 } 1422 /* scale the diagonal block */ 1423 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1424 1425 if (rr) { 1426 /* Do a scatter end and then right scale the off-diagonal block */ 1427 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1428 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1429 } 1430 1431 PetscFunctionReturn(0); 1432 } 1433 1434 1435 #undef __FUNCT__ 1436 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1437 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1438 { 1439 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1440 PetscErrorCode ierr; 1441 1442 PetscFunctionBegin; 1443 if (!a->rank) { 1444 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1445 } 1446 PetscFunctionReturn(0); 1447 } 1448 1449 #undef __FUNCT__ 1450 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1451 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1452 { 1453 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1454 PetscErrorCode ierr; 1455 1456 PetscFunctionBegin; 1457 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1458 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1459 PetscFunctionReturn(0); 1460 } 1461 #undef __FUNCT__ 1462 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1463 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1464 { 1465 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1466 PetscErrorCode ierr; 1467 1468 PetscFunctionBegin; 1469 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatEqual_MPIAIJ" 1475 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1476 { 1477 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1478 Mat a,b,c,d; 1479 PetscTruth flg; 1480 PetscErrorCode ierr; 1481 1482 PetscFunctionBegin; 1483 a = matA->A; b = matA->B; 1484 c = matB->A; d = matB->B; 1485 1486 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1487 if (flg) { 1488 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1489 } 1490 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1491 PetscFunctionReturn(0); 1492 } 1493 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "MatCopy_MPIAIJ" 1496 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1497 { 1498 PetscErrorCode ierr; 1499 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1500 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1501 1502 PetscFunctionBegin; 1503 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1504 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1505 /* because of the column compression in the off-processor part of the matrix a->B, 1506 the number of columns in a->B and b->B may be different, hence we cannot call 1507 the MatCopy() directly on the two parts. If need be, we can provide a more 1508 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1509 then copying the submatrices */ 1510 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1511 } else { 1512 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1513 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1514 } 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1520 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1521 { 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1526 PetscFunctionReturn(0); 1527 } 1528 1529 #include "petscblaslapack.h" 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatAXPY_MPIAIJ" 1532 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1533 { 1534 PetscErrorCode ierr; 1535 PetscInt i; 1536 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1537 PetscBLASInt bnz,one=1; 1538 Mat_SeqAIJ *x,*y; 1539 1540 PetscFunctionBegin; 1541 if (str == SAME_NONZERO_PATTERN) { 1542 PetscScalar alpha = a; 1543 x = (Mat_SeqAIJ *)xx->A->data; 1544 y = (Mat_SeqAIJ *)yy->A->data; 1545 bnz = (PetscBLASInt)x->nz; 1546 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1547 x = (Mat_SeqAIJ *)xx->B->data; 1548 y = (Mat_SeqAIJ *)yy->B->data; 1549 bnz = (PetscBLASInt)x->nz; 1550 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1551 } else if (str == SUBSET_NONZERO_PATTERN) { 1552 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1553 1554 x = (Mat_SeqAIJ *)xx->B->data; 1555 y = (Mat_SeqAIJ *)yy->B->data; 1556 if (y->xtoy && y->XtoY != xx->B) { 1557 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1558 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1559 } 1560 if (!y->xtoy) { /* get xtoy */ 1561 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1562 y->XtoY = xx->B; 1563 } 1564 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1565 } else { 1566 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1567 } 1568 PetscFunctionReturn(0); 1569 } 1570 1571 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1572 1573 #undef __FUNCT__ 1574 #define __FUNCT__ "MatConjugate_MPIAIJ" 1575 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1576 { 1577 #if defined(PETSC_USE_COMPLEX) 1578 PetscErrorCode ierr; 1579 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1580 1581 PetscFunctionBegin; 1582 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1583 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1584 #else 1585 PetscFunctionBegin; 1586 #endif 1587 PetscFunctionReturn(0); 1588 } 1589 1590 /* -------------------------------------------------------------------*/ 1591 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1592 MatGetRow_MPIAIJ, 1593 MatRestoreRow_MPIAIJ, 1594 MatMult_MPIAIJ, 1595 /* 4*/ MatMultAdd_MPIAIJ, 1596 MatMultTranspose_MPIAIJ, 1597 MatMultTransposeAdd_MPIAIJ, 1598 0, 1599 0, 1600 0, 1601 /*10*/ 0, 1602 0, 1603 0, 1604 MatRelax_MPIAIJ, 1605 MatTranspose_MPIAIJ, 1606 /*15*/ MatGetInfo_MPIAIJ, 1607 MatEqual_MPIAIJ, 1608 MatGetDiagonal_MPIAIJ, 1609 MatDiagonalScale_MPIAIJ, 1610 MatNorm_MPIAIJ, 1611 /*20*/ MatAssemblyBegin_MPIAIJ, 1612 MatAssemblyEnd_MPIAIJ, 1613 0, 1614 MatSetOption_MPIAIJ, 1615 MatZeroEntries_MPIAIJ, 1616 /*25*/ MatZeroRows_MPIAIJ, 1617 0, 1618 0, 1619 0, 1620 0, 1621 /*30*/ MatSetUpPreallocation_MPIAIJ, 1622 0, 1623 0, 1624 0, 1625 0, 1626 /*35*/ MatDuplicate_MPIAIJ, 1627 0, 1628 0, 1629 0, 1630 0, 1631 /*40*/ MatAXPY_MPIAIJ, 1632 MatGetSubMatrices_MPIAIJ, 1633 MatIncreaseOverlap_MPIAIJ, 1634 MatGetValues_MPIAIJ, 1635 MatCopy_MPIAIJ, 1636 /*45*/ MatPrintHelp_MPIAIJ, 1637 MatScale_MPIAIJ, 1638 0, 1639 0, 1640 0, 1641 /*50*/ MatSetBlockSize_MPIAIJ, 1642 0, 1643 0, 1644 0, 1645 0, 1646 /*55*/ MatFDColoringCreate_MPIAIJ, 1647 0, 1648 MatSetUnfactored_MPIAIJ, 1649 0, 1650 0, 1651 /*60*/ MatGetSubMatrix_MPIAIJ, 1652 MatDestroy_MPIAIJ, 1653 MatView_MPIAIJ, 1654 MatGetPetscMaps_Petsc, 1655 0, 1656 /*65*/ 0, 1657 0, 1658 0, 1659 0, 1660 0, 1661 /*70*/ 0, 1662 0, 1663 MatSetColoring_MPIAIJ, 1664 #if defined(PETSC_HAVE_ADIC) 1665 MatSetValuesAdic_MPIAIJ, 1666 #else 1667 0, 1668 #endif 1669 MatSetValuesAdifor_MPIAIJ, 1670 /*75*/ 0, 1671 0, 1672 0, 1673 0, 1674 0, 1675 /*80*/ 0, 1676 0, 1677 0, 1678 0, 1679 /*84*/ MatLoad_MPIAIJ, 1680 0, 1681 0, 1682 0, 1683 0, 1684 0, 1685 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1686 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1687 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1688 MatPtAP_Basic, 1689 MatPtAPSymbolic_MPIAIJ, 1690 /*95*/ MatPtAPNumeric_MPIAIJ, 1691 0, 1692 0, 1693 0, 1694 0, 1695 /*100*/0, 1696 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1697 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1698 MatConjugate_MPIAIJ 1699 }; 1700 1701 /* ----------------------------------------------------------------------------------------*/ 1702 1703 EXTERN_C_BEGIN 1704 #undef __FUNCT__ 1705 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1706 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1707 { 1708 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1709 PetscErrorCode ierr; 1710 1711 PetscFunctionBegin; 1712 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1713 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1714 PetscFunctionReturn(0); 1715 } 1716 EXTERN_C_END 1717 1718 EXTERN_C_BEGIN 1719 #undef __FUNCT__ 1720 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1721 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1722 { 1723 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1724 PetscErrorCode ierr; 1725 1726 PetscFunctionBegin; 1727 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1728 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1729 PetscFunctionReturn(0); 1730 } 1731 EXTERN_C_END 1732 1733 #include "petscpc.h" 1734 EXTERN_C_BEGIN 1735 #undef __FUNCT__ 1736 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1737 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1738 { 1739 Mat_MPIAIJ *b; 1740 PetscErrorCode ierr; 1741 PetscInt i; 1742 1743 PetscFunctionBegin; 1744 B->preallocated = PETSC_TRUE; 1745 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1746 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1747 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1748 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1749 if (d_nnz) { 1750 for (i=0; i<B->m; i++) { 1751 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1752 } 1753 } 1754 if (o_nnz) { 1755 for (i=0; i<B->m; i++) { 1756 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1757 } 1758 } 1759 b = (Mat_MPIAIJ*)B->data; 1760 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1761 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1762 1763 PetscFunctionReturn(0); 1764 } 1765 EXTERN_C_END 1766 1767 #undef __FUNCT__ 1768 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1769 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1770 { 1771 Mat mat; 1772 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1773 PetscErrorCode ierr; 1774 1775 PetscFunctionBegin; 1776 *newmat = 0; 1777 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1778 ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr); 1779 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1780 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1781 a = (Mat_MPIAIJ*)mat->data; 1782 1783 mat->factor = matin->factor; 1784 mat->bs = matin->bs; 1785 mat->assembled = PETSC_TRUE; 1786 mat->insertmode = NOT_SET_VALUES; 1787 mat->preallocated = PETSC_TRUE; 1788 1789 a->rstart = oldmat->rstart; 1790 a->rend = oldmat->rend; 1791 a->cstart = oldmat->cstart; 1792 a->cend = oldmat->cend; 1793 a->size = oldmat->size; 1794 a->rank = oldmat->rank; 1795 a->donotstash = oldmat->donotstash; 1796 a->roworiented = oldmat->roworiented; 1797 a->rowindices = 0; 1798 a->rowvalues = 0; 1799 a->getrowactive = PETSC_FALSE; 1800 1801 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1802 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1803 if (oldmat->colmap) { 1804 #if defined (PETSC_USE_CTABLE) 1805 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1806 #else 1807 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1808 ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1809 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1810 #endif 1811 } else a->colmap = 0; 1812 if (oldmat->garray) { 1813 PetscInt len; 1814 len = oldmat->B->n; 1815 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1816 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1817 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1818 } else a->garray = 0; 1819 1820 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1821 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1822 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1823 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1824 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1825 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1826 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1827 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1828 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1829 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1830 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1831 *newmat = mat; 1832 PetscFunctionReturn(0); 1833 } 1834 1835 #include "petscsys.h" 1836 1837 #undef __FUNCT__ 1838 #define __FUNCT__ "MatLoad_MPIAIJ" 1839 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 1840 { 1841 Mat A; 1842 PetscScalar *vals,*svals; 1843 MPI_Comm comm = ((PetscObject)viewer)->comm; 1844 MPI_Status status; 1845 PetscErrorCode ierr; 1846 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1847 PetscInt i,nz,j,rstart,rend,mmax; 1848 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1849 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1850 PetscInt cend,cstart,n,*rowners; 1851 int fd; 1852 1853 PetscFunctionBegin; 1854 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1855 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1856 if (!rank) { 1857 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1858 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1859 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1860 } 1861 1862 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1863 M = header[1]; N = header[2]; 1864 /* determine ownership of all rows */ 1865 m = M/size + ((M % size) > rank); 1866 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1867 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1868 1869 /* First process needs enough room for process with most rows */ 1870 if (!rank) { 1871 mmax = rowners[1]; 1872 for (i=2; i<size; i++) { 1873 mmax = PetscMax(mmax,rowners[i]); 1874 } 1875 } else mmax = m; 1876 1877 rowners[0] = 0; 1878 for (i=2; i<=size; i++) { 1879 mmax = PetscMax(mmax,rowners[i]); 1880 rowners[i] += rowners[i-1]; 1881 } 1882 rstart = rowners[rank]; 1883 rend = rowners[rank+1]; 1884 1885 /* distribute row lengths to all processors */ 1886 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 1887 if (!rank) { 1888 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1889 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1890 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1891 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1892 for (j=0; j<m; j++) { 1893 procsnz[0] += ourlens[j]; 1894 } 1895 for (i=1; i<size; i++) { 1896 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1897 /* calculate the number of nonzeros on each processor */ 1898 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1899 procsnz[i] += rowlengths[j]; 1900 } 1901 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1902 } 1903 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1904 } else { 1905 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1906 } 1907 1908 if (!rank) { 1909 /* determine max buffer needed and allocate it */ 1910 maxnz = 0; 1911 for (i=0; i<size; i++) { 1912 maxnz = PetscMax(maxnz,procsnz[i]); 1913 } 1914 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1915 1916 /* read in my part of the matrix column indices */ 1917 nz = procsnz[0]; 1918 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1919 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1920 1921 /* read in every one elses and ship off */ 1922 for (i=1; i<size; i++) { 1923 nz = procsnz[i]; 1924 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1925 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1926 } 1927 ierr = PetscFree(cols);CHKERRQ(ierr); 1928 } else { 1929 /* determine buffer space needed for message */ 1930 nz = 0; 1931 for (i=0; i<m; i++) { 1932 nz += ourlens[i]; 1933 } 1934 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1935 1936 /* receive message of column indices*/ 1937 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1938 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1939 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1940 } 1941 1942 /* determine column ownership if matrix is not square */ 1943 if (N != M) { 1944 n = N/size + ((N % size) > rank); 1945 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1946 cstart = cend - n; 1947 } else { 1948 cstart = rstart; 1949 cend = rend; 1950 n = cend - cstart; 1951 } 1952 1953 /* loop over local rows, determining number of off diagonal entries */ 1954 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1955 jj = 0; 1956 for (i=0; i<m; i++) { 1957 for (j=0; j<ourlens[i]; j++) { 1958 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1959 jj++; 1960 } 1961 } 1962 1963 /* create our matrix */ 1964 for (i=0; i<m; i++) { 1965 ourlens[i] -= offlens[i]; 1966 } 1967 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 1968 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 1969 ierr = MatSetType(A,type);CHKERRQ(ierr); 1970 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 1971 1972 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1973 for (i=0; i<m; i++) { 1974 ourlens[i] += offlens[i]; 1975 } 1976 1977 if (!rank) { 1978 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1979 1980 /* read in my part of the matrix numerical values */ 1981 nz = procsnz[0]; 1982 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1983 1984 /* insert into matrix */ 1985 jj = rstart; 1986 smycols = mycols; 1987 svals = vals; 1988 for (i=0; i<m; i++) { 1989 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1990 smycols += ourlens[i]; 1991 svals += ourlens[i]; 1992 jj++; 1993 } 1994 1995 /* read in other processors and ship out */ 1996 for (i=1; i<size; i++) { 1997 nz = procsnz[i]; 1998 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1999 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2000 } 2001 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2002 } else { 2003 /* receive numeric values */ 2004 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2005 2006 /* receive message of values*/ 2007 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2008 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2009 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2010 2011 /* insert into matrix */ 2012 jj = rstart; 2013 smycols = mycols; 2014 svals = vals; 2015 for (i=0; i<m; i++) { 2016 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2017 smycols += ourlens[i]; 2018 svals += ourlens[i]; 2019 jj++; 2020 } 2021 } 2022 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2023 ierr = PetscFree(vals);CHKERRQ(ierr); 2024 ierr = PetscFree(mycols);CHKERRQ(ierr); 2025 ierr = PetscFree(rowners);CHKERRQ(ierr); 2026 2027 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2028 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2029 *newmat = A; 2030 PetscFunctionReturn(0); 2031 } 2032 2033 #undef __FUNCT__ 2034 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2035 /* 2036 Not great since it makes two copies of the submatrix, first an SeqAIJ 2037 in local and then by concatenating the local matrices the end result. 2038 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2039 */ 2040 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2041 { 2042 PetscErrorCode ierr; 2043 PetscMPIInt rank,size; 2044 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2045 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2046 Mat *local,M,Mreuse; 2047 PetscScalar *vwork,*aa; 2048 MPI_Comm comm = mat->comm; 2049 Mat_SeqAIJ *aij; 2050 2051 2052 PetscFunctionBegin; 2053 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2054 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2055 2056 if (call == MAT_REUSE_MATRIX) { 2057 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2058 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2059 local = &Mreuse; 2060 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2061 } else { 2062 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2063 Mreuse = *local; 2064 ierr = PetscFree(local);CHKERRQ(ierr); 2065 } 2066 2067 /* 2068 m - number of local rows 2069 n - number of columns (same on all processors) 2070 rstart - first row in new global matrix generated 2071 */ 2072 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2073 if (call == MAT_INITIAL_MATRIX) { 2074 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2075 ii = aij->i; 2076 jj = aij->j; 2077 2078 /* 2079 Determine the number of non-zeros in the diagonal and off-diagonal 2080 portions of the matrix in order to do correct preallocation 2081 */ 2082 2083 /* first get start and end of "diagonal" columns */ 2084 if (csize == PETSC_DECIDE) { 2085 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2086 if (mglobal == n) { /* square matrix */ 2087 nlocal = m; 2088 } else { 2089 nlocal = n/size + ((n % size) > rank); 2090 } 2091 } else { 2092 nlocal = csize; 2093 } 2094 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2095 rstart = rend - nlocal; 2096 if (rank == size - 1 && rend != n) { 2097 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2098 } 2099 2100 /* next, compute all the lengths */ 2101 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2102 olens = dlens + m; 2103 for (i=0; i<m; i++) { 2104 jend = ii[i+1] - ii[i]; 2105 olen = 0; 2106 dlen = 0; 2107 for (j=0; j<jend; j++) { 2108 if (*jj < rstart || *jj >= rend) olen++; 2109 else dlen++; 2110 jj++; 2111 } 2112 olens[i] = olen; 2113 dlens[i] = dlen; 2114 } 2115 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2116 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2117 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2118 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2119 ierr = PetscFree(dlens);CHKERRQ(ierr); 2120 } else { 2121 PetscInt ml,nl; 2122 2123 M = *newmat; 2124 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2125 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2126 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2127 /* 2128 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2129 rather than the slower MatSetValues(). 2130 */ 2131 M->was_assembled = PETSC_TRUE; 2132 M->assembled = PETSC_FALSE; 2133 } 2134 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2135 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2136 ii = aij->i; 2137 jj = aij->j; 2138 aa = aij->a; 2139 for (i=0; i<m; i++) { 2140 row = rstart + i; 2141 nz = ii[i+1] - ii[i]; 2142 cwork = jj; jj += nz; 2143 vwork = aa; aa += nz; 2144 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2145 } 2146 2147 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2148 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2149 *newmat = M; 2150 2151 /* save submatrix used in processor for next request */ 2152 if (call == MAT_INITIAL_MATRIX) { 2153 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2154 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2155 } 2156 2157 PetscFunctionReturn(0); 2158 } 2159 2160 EXTERN_C_BEGIN 2161 #undef __FUNCT__ 2162 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2163 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2164 { 2165 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2166 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2167 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2168 const PetscInt *JJ; 2169 PetscScalar *values; 2170 PetscErrorCode ierr; 2171 2172 PetscFunctionBegin; 2173 #if defined(PETSC_OPT_g) 2174 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2175 #endif 2176 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2177 o_nnz = d_nnz + m; 2178 2179 for (i=0; i<m; i++) { 2180 nnz = I[i+1]- I[i]; 2181 JJ = J + I[i]; 2182 nnz_max = PetscMax(nnz_max,nnz); 2183 #if defined(PETSC_OPT_g) 2184 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2185 #endif 2186 for (j=0; j<nnz; j++) { 2187 if (*JJ >= cstart) break; 2188 JJ++; 2189 } 2190 d = 0; 2191 for (; j<nnz; j++) { 2192 if (*JJ++ >= cend) break; 2193 d++; 2194 } 2195 d_nnz[i] = d; 2196 o_nnz[i] = nnz - d; 2197 } 2198 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2199 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2200 2201 if (v) values = (PetscScalar*)v; 2202 else { 2203 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2204 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2205 } 2206 2207 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2208 for (i=0; i<m; i++) { 2209 ii = i + rstart; 2210 nnz = I[i+1]- I[i]; 2211 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2212 } 2213 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2214 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2215 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2216 2217 if (!v) { 2218 ierr = PetscFree(values);CHKERRQ(ierr); 2219 } 2220 PetscFunctionReturn(0); 2221 } 2222 EXTERN_C_END 2223 2224 #undef __FUNCT__ 2225 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2226 /*@C 2227 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2228 (the default parallel PETSc format). 2229 2230 Collective on MPI_Comm 2231 2232 Input Parameters: 2233 + A - the matrix 2234 . i - the indices into j for the start of each local row (starts with zero) 2235 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2236 - v - optional values in the matrix 2237 2238 Level: developer 2239 2240 .keywords: matrix, aij, compressed row, sparse, parallel 2241 2242 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2243 @*/ 2244 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2245 { 2246 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2247 2248 PetscFunctionBegin; 2249 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2250 if (f) { 2251 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2252 } 2253 PetscFunctionReturn(0); 2254 } 2255 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2258 /*@C 2259 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2260 (the default parallel PETSc format). For good matrix assembly performance 2261 the user should preallocate the matrix storage by setting the parameters 2262 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2263 performance can be increased by more than a factor of 50. 2264 2265 Collective on MPI_Comm 2266 2267 Input Parameters: 2268 + A - the matrix 2269 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2270 (same value is used for all local rows) 2271 . d_nnz - array containing the number of nonzeros in the various rows of the 2272 DIAGONAL portion of the local submatrix (possibly different for each row) 2273 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2274 The size of this array is equal to the number of local rows, i.e 'm'. 2275 You must leave room for the diagonal entry even if it is zero. 2276 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2277 submatrix (same value is used for all local rows). 2278 - o_nnz - array containing the number of nonzeros in the various rows of the 2279 OFF-DIAGONAL portion of the local submatrix (possibly different for 2280 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2281 structure. The size of this array is equal to the number 2282 of local rows, i.e 'm'. 2283 2284 If the *_nnz parameter is given then the *_nz parameter is ignored 2285 2286 The AIJ format (also called the Yale sparse matrix format or 2287 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2288 storage. The stored row and column indices begin with zero. See the users manual for details. 2289 2290 The parallel matrix is partitioned such that the first m0 rows belong to 2291 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2292 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2293 2294 The DIAGONAL portion of the local submatrix of a processor can be defined 2295 as the submatrix which is obtained by extraction the part corresponding 2296 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2297 first row that belongs to the processor, and r2 is the last row belonging 2298 to the this processor. This is a square mxm matrix. The remaining portion 2299 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2300 2301 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2302 2303 Example usage: 2304 2305 Consider the following 8x8 matrix with 34 non-zero values, that is 2306 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2307 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2308 as follows: 2309 2310 .vb 2311 1 2 0 | 0 3 0 | 0 4 2312 Proc0 0 5 6 | 7 0 0 | 8 0 2313 9 0 10 | 11 0 0 | 12 0 2314 ------------------------------------- 2315 13 0 14 | 15 16 17 | 0 0 2316 Proc1 0 18 0 | 19 20 21 | 0 0 2317 0 0 0 | 22 23 0 | 24 0 2318 ------------------------------------- 2319 Proc2 25 26 27 | 0 0 28 | 29 0 2320 30 0 0 | 31 32 33 | 0 34 2321 .ve 2322 2323 This can be represented as a collection of submatrices as: 2324 2325 .vb 2326 A B C 2327 D E F 2328 G H I 2329 .ve 2330 2331 Where the submatrices A,B,C are owned by proc0, D,E,F are 2332 owned by proc1, G,H,I are owned by proc2. 2333 2334 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2335 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2336 The 'M','N' parameters are 8,8, and have the same values on all procs. 2337 2338 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2339 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2340 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2341 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2342 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2343 matrix, ans [DF] as another SeqAIJ matrix. 2344 2345 When d_nz, o_nz parameters are specified, d_nz storage elements are 2346 allocated for every row of the local diagonal submatrix, and o_nz 2347 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2348 One way to choose d_nz and o_nz is to use the max nonzerors per local 2349 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2350 In this case, the values of d_nz,o_nz are: 2351 .vb 2352 proc0 : dnz = 2, o_nz = 2 2353 proc1 : dnz = 3, o_nz = 2 2354 proc2 : dnz = 1, o_nz = 4 2355 .ve 2356 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2357 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2358 for proc3. i.e we are using 12+15+10=37 storage locations to store 2359 34 values. 2360 2361 When d_nnz, o_nnz parameters are specified, the storage is specified 2362 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2363 In the above case the values for d_nnz,o_nnz are: 2364 .vb 2365 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2366 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2367 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2368 .ve 2369 Here the space allocated is sum of all the above values i.e 34, and 2370 hence pre-allocation is perfect. 2371 2372 Level: intermediate 2373 2374 .keywords: matrix, aij, compressed row, sparse, parallel 2375 2376 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2377 MPIAIJ 2378 @*/ 2379 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2380 { 2381 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2382 2383 PetscFunctionBegin; 2384 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2385 if (f) { 2386 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2387 } 2388 PetscFunctionReturn(0); 2389 } 2390 2391 #undef __FUNCT__ 2392 #define __FUNCT__ "MatCreateMPIAIJ" 2393 /*@C 2394 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2395 (the default parallel PETSc format). For good matrix assembly performance 2396 the user should preallocate the matrix storage by setting the parameters 2397 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2398 performance can be increased by more than a factor of 50. 2399 2400 Collective on MPI_Comm 2401 2402 Input Parameters: 2403 + comm - MPI communicator 2404 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2405 This value should be the same as the local size used in creating the 2406 y vector for the matrix-vector product y = Ax. 2407 . n - This value should be the same as the local size used in creating the 2408 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2409 calculated if N is given) For square matrices n is almost always m. 2410 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2411 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2412 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2413 (same value is used for all local rows) 2414 . d_nnz - array containing the number of nonzeros in the various rows of the 2415 DIAGONAL portion of the local submatrix (possibly different for each row) 2416 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2417 The size of this array is equal to the number of local rows, i.e 'm'. 2418 You must leave room for the diagonal entry even if it is zero. 2419 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2420 submatrix (same value is used for all local rows). 2421 - o_nnz - array containing the number of nonzeros in the various rows of the 2422 OFF-DIAGONAL portion of the local submatrix (possibly different for 2423 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2424 structure. The size of this array is equal to the number 2425 of local rows, i.e 'm'. 2426 2427 Output Parameter: 2428 . A - the matrix 2429 2430 Notes: 2431 If the *_nnz parameter is given then the *_nz parameter is ignored 2432 2433 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2434 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2435 storage requirements for this matrix. 2436 2437 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2438 processor than it must be used on all processors that share the object for 2439 that argument. 2440 2441 The user MUST specify either the local or global matrix dimensions 2442 (possibly both). 2443 2444 The parallel matrix is partitioned such that the first m0 rows belong to 2445 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2446 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2447 2448 The DIAGONAL portion of the local submatrix of a processor can be defined 2449 as the submatrix which is obtained by extraction the part corresponding 2450 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2451 first row that belongs to the processor, and r2 is the last row belonging 2452 to the this processor. This is a square mxm matrix. The remaining portion 2453 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2454 2455 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2456 2457 When calling this routine with a single process communicator, a matrix of 2458 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2459 type of communicator, use the construction mechanism: 2460 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2461 2462 By default, this format uses inodes (identical nodes) when possible. 2463 We search for consecutive rows with the same nonzero structure, thereby 2464 reusing matrix information to achieve increased efficiency. 2465 2466 Options Database Keys: 2467 + -mat_no_inode - Do not use inodes 2468 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2469 - -mat_aij_oneindex - Internally use indexing starting at 1 2470 rather than 0. Note that when calling MatSetValues(), 2471 the user still MUST index entries starting at 0! 2472 2473 2474 Example usage: 2475 2476 Consider the following 8x8 matrix with 34 non-zero values, that is 2477 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2478 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2479 as follows: 2480 2481 .vb 2482 1 2 0 | 0 3 0 | 0 4 2483 Proc0 0 5 6 | 7 0 0 | 8 0 2484 9 0 10 | 11 0 0 | 12 0 2485 ------------------------------------- 2486 13 0 14 | 15 16 17 | 0 0 2487 Proc1 0 18 0 | 19 20 21 | 0 0 2488 0 0 0 | 22 23 0 | 24 0 2489 ------------------------------------- 2490 Proc2 25 26 27 | 0 0 28 | 29 0 2491 30 0 0 | 31 32 33 | 0 34 2492 .ve 2493 2494 This can be represented as a collection of submatrices as: 2495 2496 .vb 2497 A B C 2498 D E F 2499 G H I 2500 .ve 2501 2502 Where the submatrices A,B,C are owned by proc0, D,E,F are 2503 owned by proc1, G,H,I are owned by proc2. 2504 2505 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2506 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2507 The 'M','N' parameters are 8,8, and have the same values on all procs. 2508 2509 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2510 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2511 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2512 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2513 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2514 matrix, ans [DF] as another SeqAIJ matrix. 2515 2516 When d_nz, o_nz parameters are specified, d_nz storage elements are 2517 allocated for every row of the local diagonal submatrix, and o_nz 2518 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2519 One way to choose d_nz and o_nz is to use the max nonzerors per local 2520 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2521 In this case, the values of d_nz,o_nz are: 2522 .vb 2523 proc0 : dnz = 2, o_nz = 2 2524 proc1 : dnz = 3, o_nz = 2 2525 proc2 : dnz = 1, o_nz = 4 2526 .ve 2527 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2528 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2529 for proc3. i.e we are using 12+15+10=37 storage locations to store 2530 34 values. 2531 2532 When d_nnz, o_nnz parameters are specified, the storage is specified 2533 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2534 In the above case the values for d_nnz,o_nnz are: 2535 .vb 2536 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2537 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2538 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2539 .ve 2540 Here the space allocated is sum of all the above values i.e 34, and 2541 hence pre-allocation is perfect. 2542 2543 Level: intermediate 2544 2545 .keywords: matrix, aij, compressed row, sparse, parallel 2546 2547 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2548 MPIAIJ 2549 @*/ 2550 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2551 { 2552 PetscErrorCode ierr; 2553 PetscMPIInt size; 2554 2555 PetscFunctionBegin; 2556 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2557 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2558 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2559 if (size > 1) { 2560 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2561 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2562 } else { 2563 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2564 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2565 } 2566 PetscFunctionReturn(0); 2567 } 2568 2569 #undef __FUNCT__ 2570 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2571 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2572 { 2573 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2574 2575 PetscFunctionBegin; 2576 *Ad = a->A; 2577 *Ao = a->B; 2578 *colmap = a->garray; 2579 PetscFunctionReturn(0); 2580 } 2581 2582 #undef __FUNCT__ 2583 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2584 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2585 { 2586 PetscErrorCode ierr; 2587 PetscInt i; 2588 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2589 2590 PetscFunctionBegin; 2591 if (coloring->ctype == IS_COLORING_LOCAL) { 2592 ISColoringValue *allcolors,*colors; 2593 ISColoring ocoloring; 2594 2595 /* set coloring for diagonal portion */ 2596 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2597 2598 /* set coloring for off-diagonal portion */ 2599 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2600 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2601 for (i=0; i<a->B->n; i++) { 2602 colors[i] = allcolors[a->garray[i]]; 2603 } 2604 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2605 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2606 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2607 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2608 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2609 ISColoringValue *colors; 2610 PetscInt *larray; 2611 ISColoring ocoloring; 2612 2613 /* set coloring for diagonal portion */ 2614 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2615 for (i=0; i<a->A->n; i++) { 2616 larray[i] = i + a->cstart; 2617 } 2618 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2619 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2620 for (i=0; i<a->A->n; i++) { 2621 colors[i] = coloring->colors[larray[i]]; 2622 } 2623 ierr = PetscFree(larray);CHKERRQ(ierr); 2624 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2625 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2626 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2627 2628 /* set coloring for off-diagonal portion */ 2629 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2630 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2631 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2632 for (i=0; i<a->B->n; i++) { 2633 colors[i] = coloring->colors[larray[i]]; 2634 } 2635 ierr = PetscFree(larray);CHKERRQ(ierr); 2636 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2637 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2638 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2639 } else { 2640 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2641 } 2642 2643 PetscFunctionReturn(0); 2644 } 2645 2646 #if defined(PETSC_HAVE_ADIC) 2647 #undef __FUNCT__ 2648 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2649 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2650 { 2651 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2652 PetscErrorCode ierr; 2653 2654 PetscFunctionBegin; 2655 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2656 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2657 PetscFunctionReturn(0); 2658 } 2659 #endif 2660 2661 #undef __FUNCT__ 2662 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2663 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2664 { 2665 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2666 PetscErrorCode ierr; 2667 2668 PetscFunctionBegin; 2669 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2670 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2671 PetscFunctionReturn(0); 2672 } 2673 2674 #undef __FUNCT__ 2675 #define __FUNCT__ "MatMerge" 2676 /*@C 2677 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2678 matrices from each processor 2679 2680 Collective on MPI_Comm 2681 2682 Input Parameters: 2683 + comm - the communicators the parallel matrix will live on 2684 . inmat - the input sequential matrices 2685 . n - number of local columns (or PETSC_DECIDE) 2686 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2687 2688 Output Parameter: 2689 . outmat - the parallel matrix generated 2690 2691 Level: advanced 2692 2693 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2694 2695 @*/ 2696 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2697 { 2698 PetscErrorCode ierr; 2699 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2700 PetscInt *indx; 2701 PetscScalar *values; 2702 PetscMap columnmap,rowmap; 2703 2704 PetscFunctionBegin; 2705 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2706 /* 2707 PetscMPIInt rank; 2708 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2709 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2710 */ 2711 if (scall == MAT_INITIAL_MATRIX){ 2712 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2713 if (n == PETSC_DECIDE){ 2714 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2715 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2716 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2717 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2718 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2719 } 2720 2721 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2722 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2723 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2724 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2725 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2726 2727 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2728 for (i=0;i<m;i++) { 2729 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2730 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2731 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2732 } 2733 /* This routine will ONLY return MPIAIJ type matrix */ 2734 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2735 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2736 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2737 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2738 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2739 2740 } else if (scall == MAT_REUSE_MATRIX){ 2741 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2742 } else { 2743 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2744 } 2745 2746 for (i=0;i<m;i++) { 2747 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2748 I = i + rstart; 2749 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2750 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2751 } 2752 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2753 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2754 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2755 2756 PetscFunctionReturn(0); 2757 } 2758 2759 #undef __FUNCT__ 2760 #define __FUNCT__ "MatFileSplit" 2761 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2762 { 2763 PetscErrorCode ierr; 2764 PetscMPIInt rank; 2765 PetscInt m,N,i,rstart,nnz; 2766 size_t len; 2767 const PetscInt *indx; 2768 PetscViewer out; 2769 char *name; 2770 Mat B; 2771 const PetscScalar *values; 2772 2773 PetscFunctionBegin; 2774 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2775 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2776 /* Should this be the type of the diagonal block of A? */ 2777 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2778 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 2779 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2780 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2781 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2782 for (i=0;i<m;i++) { 2783 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2784 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2785 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2786 } 2787 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2788 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2789 2790 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2791 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2792 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2793 sprintf(name,"%s.%d",outfile,rank); 2794 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2795 ierr = PetscFree(name); 2796 ierr = MatView(B,out);CHKERRQ(ierr); 2797 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2798 ierr = MatDestroy(B);CHKERRQ(ierr); 2799 PetscFunctionReturn(0); 2800 } 2801 2802 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2803 #undef __FUNCT__ 2804 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2805 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2806 { 2807 PetscErrorCode ierr; 2808 Mat_Merge_SeqsToMPI *merge; 2809 PetscObjectContainer container; 2810 2811 PetscFunctionBegin; 2812 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2813 if (container) { 2814 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2815 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2816 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2817 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2818 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2819 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2820 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2821 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2822 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2823 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2824 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2825 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2826 2827 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2828 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2829 } 2830 ierr = PetscFree(merge);CHKERRQ(ierr); 2831 2832 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2833 PetscFunctionReturn(0); 2834 } 2835 2836 #include "src/mat/utils/freespace.h" 2837 #include "petscbt.h" 2838 static PetscEvent logkey_seqstompinum = 0; 2839 #undef __FUNCT__ 2840 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 2841 /*@C 2842 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2843 matrices from each processor 2844 2845 Collective on MPI_Comm 2846 2847 Input Parameters: 2848 + comm - the communicators the parallel matrix will live on 2849 . seqmat - the input sequential matrices 2850 . m - number of local rows (or PETSC_DECIDE) 2851 . n - number of local columns (or PETSC_DECIDE) 2852 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2853 2854 Output Parameter: 2855 . mpimat - the parallel matrix generated 2856 2857 Level: advanced 2858 2859 Notes: 2860 The dimensions of the sequential matrix in each processor MUST be the same. 2861 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2862 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2863 @*/ 2864 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2865 { 2866 PetscErrorCode ierr; 2867 MPI_Comm comm=mpimat->comm; 2868 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2869 PetscMPIInt size,rank,taga,*len_s; 2870 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2871 PetscInt proc,m; 2872 PetscInt **buf_ri,**buf_rj; 2873 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2874 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2875 MPI_Request *s_waits,*r_waits; 2876 MPI_Status *status; 2877 MatScalar *aa=a->a,**abuf_r,*ba_i; 2878 Mat_Merge_SeqsToMPI *merge; 2879 PetscObjectContainer container; 2880 2881 PetscFunctionBegin; 2882 if (!logkey_seqstompinum) { 2883 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2884 } 2885 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2886 2887 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2888 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2889 2890 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2891 if (container) { 2892 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2893 } 2894 bi = merge->bi; 2895 bj = merge->bj; 2896 buf_ri = merge->buf_ri; 2897 buf_rj = merge->buf_rj; 2898 2899 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2900 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2901 len_s = merge->len_s; 2902 2903 /* send and recv matrix values */ 2904 /*-----------------------------*/ 2905 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2906 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2907 2908 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2909 for (proc=0,k=0; proc<size; proc++){ 2910 if (!len_s[proc]) continue; 2911 i = owners[proc]; 2912 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2913 k++; 2914 } 2915 2916 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 2917 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 2918 ierr = PetscFree(status);CHKERRQ(ierr); 2919 2920 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2921 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2922 2923 /* insert mat values of mpimat */ 2924 /*----------------------------*/ 2925 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2926 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2927 nextrow = buf_ri_k + merge->nrecv; 2928 nextai = nextrow + merge->nrecv; 2929 2930 for (k=0; k<merge->nrecv; k++){ 2931 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2932 nrows = *(buf_ri_k[k]); 2933 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2934 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2935 } 2936 2937 /* set values of ba */ 2938 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2939 for (i=0; i<m; i++) { 2940 arow = owners[rank] + i; 2941 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2942 bnzi = bi[i+1] - bi[i]; 2943 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2944 2945 /* add local non-zero vals of this proc's seqmat into ba */ 2946 anzi = ai[arow+1] - ai[arow]; 2947 aj = a->j + ai[arow]; 2948 aa = a->a + ai[arow]; 2949 nextaj = 0; 2950 for (j=0; nextaj<anzi; j++){ 2951 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2952 ba_i[j] += aa[nextaj++]; 2953 } 2954 } 2955 2956 /* add received vals into ba */ 2957 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2958 /* i-th row */ 2959 if (i == *nextrow[k]) { 2960 anzi = *(nextai[k]+1) - *nextai[k]; 2961 aj = buf_rj[k] + *(nextai[k]); 2962 aa = abuf_r[k] + *(nextai[k]); 2963 nextaj = 0; 2964 for (j=0; nextaj<anzi; j++){ 2965 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2966 ba_i[j] += aa[nextaj++]; 2967 } 2968 } 2969 nextrow[k]++; nextai[k]++; 2970 } 2971 } 2972 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 2973 } 2974 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2975 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2976 2977 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 2978 ierr = PetscFree(ba_i);CHKERRQ(ierr); 2979 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 2980 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2981 PetscFunctionReturn(0); 2982 } 2983 2984 static PetscEvent logkey_seqstompisym = 0; 2985 #undef __FUNCT__ 2986 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 2987 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 2988 { 2989 PetscErrorCode ierr; 2990 Mat B_mpi; 2991 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2992 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 2993 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 2994 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 2995 PetscInt len,proc,*dnz,*onz; 2996 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 2997 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 2998 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 2999 MPI_Status *status; 3000 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3001 PetscBT lnkbt; 3002 Mat_Merge_SeqsToMPI *merge; 3003 PetscObjectContainer container; 3004 3005 PetscFunctionBegin; 3006 if (!logkey_seqstompisym) { 3007 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3008 } 3009 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3010 3011 /* make sure it is a PETSc comm */ 3012 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3013 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3014 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3015 3016 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3017 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3018 3019 /* determine row ownership */ 3020 /*---------------------------------------------------------*/ 3021 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3022 if (m == PETSC_DECIDE) { 3023 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3024 } else { 3025 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3026 } 3027 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3028 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3029 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3030 3031 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3032 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3033 3034 /* determine the number of messages to send, their lengths */ 3035 /*---------------------------------------------------------*/ 3036 len_s = merge->len_s; 3037 3038 len = 0; /* length of buf_si[] */ 3039 merge->nsend = 0; 3040 for (proc=0; proc<size; proc++){ 3041 len_si[proc] = 0; 3042 if (proc == rank){ 3043 len_s[proc] = 0; 3044 } else { 3045 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3046 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3047 } 3048 if (len_s[proc]) { 3049 merge->nsend++; 3050 nrows = 0; 3051 for (i=owners[proc]; i<owners[proc+1]; i++){ 3052 if (ai[i+1] > ai[i]) nrows++; 3053 } 3054 len_si[proc] = 2*(nrows+1); 3055 len += len_si[proc]; 3056 } 3057 } 3058 3059 /* determine the number and length of messages to receive for ij-structure */ 3060 /*-------------------------------------------------------------------------*/ 3061 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3062 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3063 3064 /* post the Irecv of j-structure */ 3065 /*-------------------------------*/ 3066 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3067 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3068 3069 /* post the Isend of j-structure */ 3070 /*--------------------------------*/ 3071 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3072 sj_waits = si_waits + merge->nsend; 3073 3074 for (proc=0, k=0; proc<size; proc++){ 3075 if (!len_s[proc]) continue; 3076 i = owners[proc]; 3077 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3078 k++; 3079 } 3080 3081 /* receives and sends of j-structure are complete */ 3082 /*------------------------------------------------*/ 3083 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3084 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3085 3086 /* send and recv i-structure */ 3087 /*---------------------------*/ 3088 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3089 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3090 3091 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3092 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3093 for (proc=0,k=0; proc<size; proc++){ 3094 if (!len_s[proc]) continue; 3095 /* form outgoing message for i-structure: 3096 buf_si[0]: nrows to be sent 3097 [1:nrows]: row index (global) 3098 [nrows+1:2*nrows+1]: i-structure index 3099 */ 3100 /*-------------------------------------------*/ 3101 nrows = len_si[proc]/2 - 1; 3102 buf_si_i = buf_si + nrows+1; 3103 buf_si[0] = nrows; 3104 buf_si_i[0] = 0; 3105 nrows = 0; 3106 for (i=owners[proc]; i<owners[proc+1]; i++){ 3107 anzi = ai[i+1] - ai[i]; 3108 if (anzi) { 3109 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3110 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3111 nrows++; 3112 } 3113 } 3114 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3115 k++; 3116 buf_si += len_si[proc]; 3117 } 3118 3119 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3120 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3121 3122 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr); 3123 for (i=0; i<merge->nrecv; i++){ 3124 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr); 3125 } 3126 3127 ierr = PetscFree(len_si);CHKERRQ(ierr); 3128 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3129 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3130 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3131 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3132 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3133 ierr = PetscFree(status);CHKERRQ(ierr); 3134 3135 /* compute a local seq matrix in each processor */ 3136 /*----------------------------------------------*/ 3137 /* allocate bi array and free space for accumulating nonzero column info */ 3138 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3139 bi[0] = 0; 3140 3141 /* create and initialize a linked list */ 3142 nlnk = N+1; 3143 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3144 3145 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3146 len = 0; 3147 len = ai[owners[rank+1]] - ai[owners[rank]]; 3148 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3149 current_space = free_space; 3150 3151 /* determine symbolic info for each local row */ 3152 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3153 nextrow = buf_ri_k + merge->nrecv; 3154 nextai = nextrow + merge->nrecv; 3155 for (k=0; k<merge->nrecv; k++){ 3156 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3157 nrows = *buf_ri_k[k]; 3158 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3159 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3160 } 3161 3162 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3163 len = 0; 3164 for (i=0;i<m;i++) { 3165 bnzi = 0; 3166 /* add local non-zero cols of this proc's seqmat into lnk */ 3167 arow = owners[rank] + i; 3168 anzi = ai[arow+1] - ai[arow]; 3169 aj = a->j + ai[arow]; 3170 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3171 bnzi += nlnk; 3172 /* add received col data into lnk */ 3173 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3174 if (i == *nextrow[k]) { /* i-th row */ 3175 anzi = *(nextai[k]+1) - *nextai[k]; 3176 aj = buf_rj[k] + *nextai[k]; 3177 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3178 bnzi += nlnk; 3179 nextrow[k]++; nextai[k]++; 3180 } 3181 } 3182 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3183 3184 /* if free space is not available, make more free space */ 3185 if (current_space->local_remaining<bnzi) { 3186 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3187 nspacedouble++; 3188 } 3189 /* copy data into free space, then initialize lnk */ 3190 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3191 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3192 3193 current_space->array += bnzi; 3194 current_space->local_used += bnzi; 3195 current_space->local_remaining -= bnzi; 3196 3197 bi[i+1] = bi[i] + bnzi; 3198 } 3199 3200 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3201 3202 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3203 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3204 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3205 3206 /* create symbolic parallel matrix B_mpi */ 3207 /*---------------------------------------*/ 3208 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3209 if (n==PETSC_DECIDE) { 3210 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3211 } else { 3212 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3213 } 3214 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3215 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3216 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3217 3218 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3219 B_mpi->assembled = PETSC_FALSE; 3220 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3221 merge->bi = bi; 3222 merge->bj = bj; 3223 merge->buf_ri = buf_ri; 3224 merge->buf_rj = buf_rj; 3225 merge->coi = PETSC_NULL; 3226 merge->coj = PETSC_NULL; 3227 merge->owners_co = PETSC_NULL; 3228 3229 /* attach the supporting struct to B_mpi for reuse */ 3230 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3231 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3232 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3233 *mpimat = B_mpi; 3234 3235 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3236 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3237 PetscFunctionReturn(0); 3238 } 3239 3240 static PetscEvent logkey_seqstompi = 0; 3241 #undef __FUNCT__ 3242 #define __FUNCT__ "MatMerge_SeqsToMPI" 3243 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3244 { 3245 PetscErrorCode ierr; 3246 3247 PetscFunctionBegin; 3248 if (!logkey_seqstompi) { 3249 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3250 } 3251 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3252 if (scall == MAT_INITIAL_MATRIX){ 3253 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3254 } 3255 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3256 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3257 PetscFunctionReturn(0); 3258 } 3259 static PetscEvent logkey_getlocalmat = 0; 3260 #undef __FUNCT__ 3261 #define __FUNCT__ "MatGetLocalMat" 3262 /*@C 3263 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3264 3265 Not Collective 3266 3267 Input Parameters: 3268 + A - the matrix 3269 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3270 3271 Output Parameter: 3272 . A_loc - the local sequential matrix generated 3273 3274 Level: developer 3275 3276 @*/ 3277 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3278 { 3279 PetscErrorCode ierr; 3280 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3281 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3282 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3283 PetscScalar *aa=a->a,*ba=b->a,*ca; 3284 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3285 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3286 3287 PetscFunctionBegin; 3288 if (!logkey_getlocalmat) { 3289 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3290 } 3291 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3292 if (scall == MAT_INITIAL_MATRIX){ 3293 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3294 ci[0] = 0; 3295 for (i=0; i<am; i++){ 3296 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3297 } 3298 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3299 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3300 k = 0; 3301 for (i=0; i<am; i++) { 3302 ncols_o = bi[i+1] - bi[i]; 3303 ncols_d = ai[i+1] - ai[i]; 3304 /* off-diagonal portion of A */ 3305 for (jo=0; jo<ncols_o; jo++) { 3306 col = cmap[*bj]; 3307 if (col >= cstart) break; 3308 cj[k] = col; bj++; 3309 ca[k++] = *ba++; 3310 } 3311 /* diagonal portion of A */ 3312 for (j=0; j<ncols_d; j++) { 3313 cj[k] = cstart + *aj++; 3314 ca[k++] = *aa++; 3315 } 3316 /* off-diagonal portion of A */ 3317 for (j=jo; j<ncols_o; j++) { 3318 cj[k] = cmap[*bj++]; 3319 ca[k++] = *ba++; 3320 } 3321 } 3322 /* put together the new matrix */ 3323 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3324 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3325 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3326 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3327 mat->freedata = PETSC_TRUE; 3328 mat->nonew = 0; 3329 } else if (scall == MAT_REUSE_MATRIX){ 3330 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3331 ci = mat->i; cj = mat->j; ca = mat->a; 3332 for (i=0; i<am; i++) { 3333 /* off-diagonal portion of A */ 3334 ncols_o = bi[i+1] - bi[i]; 3335 for (jo=0; jo<ncols_o; jo++) { 3336 col = cmap[*bj]; 3337 if (col >= cstart) break; 3338 *ca++ = *ba++; bj++; 3339 } 3340 /* diagonal portion of A */ 3341 ncols_d = ai[i+1] - ai[i]; 3342 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3343 /* off-diagonal portion of A */ 3344 for (j=jo; j<ncols_o; j++) { 3345 *ca++ = *ba++; bj++; 3346 } 3347 } 3348 } else { 3349 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3350 } 3351 3352 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3353 PetscFunctionReturn(0); 3354 } 3355 3356 static PetscEvent logkey_getlocalmatcondensed = 0; 3357 #undef __FUNCT__ 3358 #define __FUNCT__ "MatGetLocalMatCondensed" 3359 /*@C 3360 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3361 3362 Not Collective 3363 3364 Input Parameters: 3365 + A - the matrix 3366 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3367 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3368 3369 Output Parameter: 3370 . A_loc - the local sequential matrix generated 3371 3372 Level: developer 3373 3374 @*/ 3375 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3376 { 3377 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3378 PetscErrorCode ierr; 3379 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3380 IS isrowa,iscola; 3381 Mat *aloc; 3382 3383 PetscFunctionBegin; 3384 if (!logkey_getlocalmatcondensed) { 3385 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3386 } 3387 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3388 if (!row){ 3389 start = a->rstart; end = a->rend; 3390 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3391 } else { 3392 isrowa = *row; 3393 } 3394 if (!col){ 3395 start = a->cstart; 3396 cmap = a->garray; 3397 nzA = a->A->n; 3398 nzB = a->B->n; 3399 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3400 ncols = 0; 3401 for (i=0; i<nzB; i++) { 3402 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3403 else break; 3404 } 3405 imark = i; 3406 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3407 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3408 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3409 ierr = PetscFree(idx);CHKERRQ(ierr); 3410 } else { 3411 iscola = *col; 3412 } 3413 if (scall != MAT_INITIAL_MATRIX){ 3414 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3415 aloc[0] = *A_loc; 3416 } 3417 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3418 *A_loc = aloc[0]; 3419 ierr = PetscFree(aloc);CHKERRQ(ierr); 3420 if (!row){ 3421 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3422 } 3423 if (!col){ 3424 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3425 } 3426 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3427 PetscFunctionReturn(0); 3428 } 3429 3430 static PetscEvent logkey_GetBrowsOfAcols = 0; 3431 #undef __FUNCT__ 3432 #define __FUNCT__ "MatGetBrowsOfAcols" 3433 /*@C 3434 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3435 3436 Collective on Mat 3437 3438 Input Parameters: 3439 + A,B - the matrices in mpiaij format 3440 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3441 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3442 3443 Output Parameter: 3444 + rowb, colb - index sets of rows and columns of B to extract 3445 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3446 - B_seq - the sequential matrix generated 3447 3448 Level: developer 3449 3450 @*/ 3451 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3452 { 3453 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3454 PetscErrorCode ierr; 3455 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3456 IS isrowb,iscolb; 3457 Mat *bseq; 3458 3459 PetscFunctionBegin; 3460 if (a->cstart != b->rstart || a->cend != b->rend){ 3461 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3462 } 3463 if (!logkey_GetBrowsOfAcols) { 3464 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3465 } 3466 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3467 3468 if (scall == MAT_INITIAL_MATRIX){ 3469 start = a->cstart; 3470 cmap = a->garray; 3471 nzA = a->A->n; 3472 nzB = a->B->n; 3473 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3474 ncols = 0; 3475 for (i=0; i<nzB; i++) { /* row < local row index */ 3476 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3477 else break; 3478 } 3479 imark = i; 3480 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3481 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3482 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3483 ierr = PetscFree(idx);CHKERRQ(ierr); 3484 *brstart = imark; 3485 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3486 } else { 3487 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3488 isrowb = *rowb; iscolb = *colb; 3489 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3490 bseq[0] = *B_seq; 3491 } 3492 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3493 *B_seq = bseq[0]; 3494 ierr = PetscFree(bseq);CHKERRQ(ierr); 3495 if (!rowb){ 3496 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3497 } else { 3498 *rowb = isrowb; 3499 } 3500 if (!colb){ 3501 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3502 } else { 3503 *colb = iscolb; 3504 } 3505 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3506 PetscFunctionReturn(0); 3507 } 3508 3509 static PetscEvent logkey_GetBrowsOfAocols = 0; 3510 #undef __FUNCT__ 3511 #define __FUNCT__ "MatGetBrowsOfAoCols" 3512 /*@C 3513 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3514 of the OFF-DIAGONAL portion of local A 3515 3516 Collective on Mat 3517 3518 Input Parameters: 3519 + A,B - the matrices in mpiaij format 3520 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3521 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3522 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3523 3524 Output Parameter: 3525 + B_oth - the sequential matrix generated 3526 3527 Level: developer 3528 3529 @*/ 3530 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3531 { 3532 VecScatter_MPI_General *gen_to,*gen_from; 3533 PetscErrorCode ierr; 3534 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3535 Mat_SeqAIJ *b_oth; 3536 VecScatter ctx=a->Mvctx; 3537 MPI_Comm comm=ctx->comm; 3538 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3539 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3540 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3541 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3542 MPI_Request *rwaits,*swaits; 3543 MPI_Status *sstatus,rstatus; 3544 PetscInt *cols; 3545 PetscScalar *vals; 3546 PetscMPIInt j; 3547 3548 PetscFunctionBegin; 3549 if (a->cstart != b->rstart || a->cend != b->rend){ 3550 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3551 } 3552 if (!logkey_GetBrowsOfAocols) { 3553 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3554 } 3555 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3556 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3557 3558 gen_to = (VecScatter_MPI_General*)ctx->todata; 3559 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3560 rvalues = gen_from->values; /* holds the length of sending row */ 3561 svalues = gen_to->values; /* holds the length of receiving row */ 3562 nrecvs = gen_from->n; 3563 nsends = gen_to->n; 3564 rwaits = gen_from->requests; 3565 swaits = gen_to->requests; 3566 srow = gen_to->indices; /* local row index to be sent */ 3567 rstarts = gen_from->starts; 3568 sstarts = gen_to->starts; 3569 rprocs = gen_from->procs; 3570 sprocs = gen_to->procs; 3571 sstatus = gen_to->sstatus; 3572 3573 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3574 if (scall == MAT_INITIAL_MATRIX){ 3575 /* i-array */ 3576 /*---------*/ 3577 /* post receives */ 3578 for (i=0; i<nrecvs; i++){ 3579 rowlen = (PetscInt*)rvalues + rstarts[i]; 3580 nrows = rstarts[i+1]-rstarts[i]; 3581 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3582 } 3583 3584 /* pack the outgoing message */ 3585 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3586 rstartsj = sstartsj + nsends +1; 3587 sstartsj[0] = 0; rstartsj[0] = 0; 3588 len = 0; /* total length of j or a array to be sent */ 3589 k = 0; 3590 for (i=0; i<nsends; i++){ 3591 rowlen = (PetscInt*)svalues + sstarts[i]; 3592 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3593 for (j=0; j<nrows; j++) { 3594 row = srow[k] + b->rowners[rank]; /* global row idx */ 3595 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3596 len += rowlen[j]; 3597 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3598 k++; 3599 } 3600 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3601 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3602 } 3603 /* recvs and sends of i-array are completed */ 3604 i = nrecvs; 3605 while (i--) { 3606 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3607 } 3608 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3609 /* allocate buffers for sending j and a arrays */ 3610 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3611 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3612 3613 /* create i-array of B_oth */ 3614 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3615 b_othi[0] = 0; 3616 len = 0; /* total length of j or a array to be received */ 3617 k = 0; 3618 for (i=0; i<nrecvs; i++){ 3619 rowlen = (PetscInt*)rvalues + rstarts[i]; 3620 nrows = rstarts[i+1]-rstarts[i]; 3621 for (j=0; j<nrows; j++) { 3622 b_othi[k+1] = b_othi[k] + rowlen[j]; 3623 len += rowlen[j]; k++; 3624 } 3625 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3626 } 3627 3628 /* allocate space for j and a arrrays of B_oth */ 3629 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3630 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3631 3632 /* j-array */ 3633 /*---------*/ 3634 /* post receives of j-array */ 3635 for (i=0; i<nrecvs; i++){ 3636 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3637 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3638 } 3639 k = 0; 3640 for (i=0; i<nsends; i++){ 3641 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3642 bufJ = bufj+sstartsj[i]; 3643 for (j=0; j<nrows; j++) { 3644 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3645 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3646 for (l=0; l<ncols; l++){ 3647 *bufJ++ = cols[l]; 3648 } 3649 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3650 } 3651 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3652 } 3653 3654 /* recvs and sends of j-array are completed */ 3655 i = nrecvs; 3656 while (i--) { 3657 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3658 } 3659 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3660 } else if (scall == MAT_REUSE_MATRIX){ 3661 sstartsj = *startsj; 3662 rstartsj = sstartsj + nsends +1; 3663 bufa = *bufa_ptr; 3664 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3665 b_otha = b_oth->a; 3666 } else { 3667 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3668 } 3669 3670 /* a-array */ 3671 /*---------*/ 3672 /* post receives of a-array */ 3673 for (i=0; i<nrecvs; i++){ 3674 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3675 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3676 } 3677 k = 0; 3678 for (i=0; i<nsends; i++){ 3679 nrows = sstarts[i+1]-sstarts[i]; 3680 bufA = bufa+sstartsj[i]; 3681 for (j=0; j<nrows; j++) { 3682 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3683 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3684 for (l=0; l<ncols; l++){ 3685 *bufA++ = vals[l]; 3686 } 3687 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3688 3689 } 3690 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3691 } 3692 /* recvs and sends of a-array are completed */ 3693 i = nrecvs; 3694 while (i--) { 3695 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3696 } 3697 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3698 3699 if (scall == MAT_INITIAL_MATRIX){ 3700 /* put together the new matrix */ 3701 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3702 3703 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3704 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3705 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3706 b_oth->freedata = PETSC_TRUE; 3707 b_oth->nonew = 0; 3708 3709 ierr = PetscFree(bufj);CHKERRQ(ierr); 3710 if (!startsj || !bufa_ptr){ 3711 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3712 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3713 } else { 3714 *startsj = sstartsj; 3715 *bufa_ptr = bufa; 3716 } 3717 } 3718 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3719 3720 PetscFunctionReturn(0); 3721 } 3722 3723 /*MC 3724 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3725 3726 Options Database Keys: 3727 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3728 3729 Level: beginner 3730 3731 .seealso: MatCreateMPIAIJ 3732 M*/ 3733 3734 EXTERN_C_BEGIN 3735 #undef __FUNCT__ 3736 #define __FUNCT__ "MatCreate_MPIAIJ" 3737 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 3738 { 3739 Mat_MPIAIJ *b; 3740 PetscErrorCode ierr; 3741 PetscInt i; 3742 PetscMPIInt size; 3743 3744 PetscFunctionBegin; 3745 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3746 3747 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3748 B->data = (void*)b; 3749 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3750 B->factor = 0; 3751 B->bs = 1; 3752 B->assembled = PETSC_FALSE; 3753 B->mapping = 0; 3754 3755 B->insertmode = NOT_SET_VALUES; 3756 b->size = size; 3757 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3758 3759 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3760 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3761 3762 /* the information in the maps duplicates the information computed below, eventually 3763 we should remove the duplicate information that is not contained in the maps */ 3764 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3765 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3766 3767 /* build local table of row and column ownerships */ 3768 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3769 ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3770 b->cowners = b->rowners + b->size + 2; 3771 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3772 b->rowners[0] = 0; 3773 for (i=2; i<=b->size; i++) { 3774 b->rowners[i] += b->rowners[i-1]; 3775 } 3776 b->rstart = b->rowners[b->rank]; 3777 b->rend = b->rowners[b->rank+1]; 3778 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3779 b->cowners[0] = 0; 3780 for (i=2; i<=b->size; i++) { 3781 b->cowners[i] += b->cowners[i-1]; 3782 } 3783 b->cstart = b->cowners[b->rank]; 3784 b->cend = b->cowners[b->rank+1]; 3785 3786 /* build cache for off array entries formed */ 3787 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3788 b->donotstash = PETSC_FALSE; 3789 b->colmap = 0; 3790 b->garray = 0; 3791 b->roworiented = PETSC_TRUE; 3792 3793 /* stuff used for matrix vector multiply */ 3794 b->lvec = PETSC_NULL; 3795 b->Mvctx = PETSC_NULL; 3796 3797 /* stuff for MatGetRow() */ 3798 b->rowindices = 0; 3799 b->rowvalues = 0; 3800 b->getrowactive = PETSC_FALSE; 3801 3802 /* Explicitly create 2 MATSEQAIJ matrices. */ 3803 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3804 ierr = MatSetSizes(b->A,B->m,B->n,B->m,B->n);CHKERRQ(ierr); 3805 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3806 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3807 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3808 ierr = MatSetSizes(b->B,B->m,B->N,B->m,B->N);CHKERRQ(ierr); 3809 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3810 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3811 3812 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3813 "MatStoreValues_MPIAIJ", 3814 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3815 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3816 "MatRetrieveValues_MPIAIJ", 3817 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3818 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3819 "MatGetDiagonalBlock_MPIAIJ", 3820 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3821 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3822 "MatIsTranspose_MPIAIJ", 3823 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3824 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3825 "MatMPIAIJSetPreallocation_MPIAIJ", 3826 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3827 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3828 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3829 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3830 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3831 "MatDiagonalScaleLocal_MPIAIJ", 3832 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3833 PetscFunctionReturn(0); 3834 } 3835 EXTERN_C_END 3836