1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (lastcol1 > col) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \ 59 N = nrow1++ - 1; a->nz++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (lastcol2 > col) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \ 93 N = nrow2++ - 1; b->nz++; \ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValues_MPIAIJ" 107 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 110 PetscScalar value; 111 PetscErrorCode ierr; 112 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 113 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 114 PetscTruth roworiented = aij->roworiented; 115 116 /* Some Variables required in the macro */ 117 Mat A = aij->A; 118 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 119 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 120 PetscScalar *aa = a->a; 121 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 122 Mat B = aij->B; 123 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 124 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 125 PetscScalar *ba = b->a; 126 127 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 128 PetscInt nonew = a->nonew; 129 PetscScalar *ap1,*ap2; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (im[i] < 0) continue; 134 #if defined(PETSC_USE_DEBUG) 135 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 136 #endif 137 if (im[i] >= rstart && im[i] < rend) { 138 row = im[i] - rstart; 139 lastcol1 = -1; 140 rp1 = aj + ai[row]; 141 ap1 = aa + ai[row]; 142 rmax1 = aimax[row]; 143 nrow1 = ailen[row]; 144 low1 = 0; 145 high1 = nrow1; 146 lastcol2 = -1; 147 rp2 = bj + bi[row]; 148 ap2 = ba + bi[row]; 149 rmax2 = bimax[row]; 150 nrow2 = bilen[row]; 151 low2 = 0; 152 high2 = nrow2; 153 154 for (j=0; j<n; j++) { 155 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 156 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 157 if (in[j] >= cstart && in[j] < cend){ 158 col = in[j] - cstart; 159 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 160 } else if (in[j] < 0) continue; 161 #if defined(PETSC_USE_DEBUG) 162 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 163 #endif 164 else { 165 if (mat->was_assembled) { 166 if (!aij->colmap) { 167 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 168 } 169 #if defined (PETSC_USE_CTABLE) 170 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 171 col--; 172 #else 173 col = aij->colmap[in[j]] - 1; 174 #endif 175 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 176 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 177 col = in[j]; 178 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 179 B = aij->B; 180 b = (Mat_SeqAIJ*)B->data; 181 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 182 rp2 = bj + bi[row]; 183 ap2 = ba + bi[row]; 184 rmax2 = bimax[row]; 185 nrow2 = bilen[row]; 186 low2 = 0; 187 high2 = nrow2; 188 bm = aij->B->m; 189 ba = b->a; 190 } 191 } else col = in[j]; 192 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 193 } 194 } 195 } else { 196 if (!aij->donotstash) { 197 if (roworiented) { 198 if (ignorezeroentries && v[i*n] == 0.0) continue; 199 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 200 } else { 201 if (ignorezeroentries && v[i] == 0.0) continue; 202 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 203 } 204 } 205 } 206 } 207 PetscFunctionReturn(0); 208 } 209 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "MatGetValues_MPIAIJ" 213 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 214 { 215 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 216 PetscErrorCode ierr; 217 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 218 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 219 220 PetscFunctionBegin; 221 for (i=0; i<m; i++) { 222 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 223 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 224 if (idxm[i] >= rstart && idxm[i] < rend) { 225 row = idxm[i] - rstart; 226 for (j=0; j<n; j++) { 227 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 228 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 229 if (idxn[j] >= cstart && idxn[j] < cend){ 230 col = idxn[j] - cstart; 231 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 232 } else { 233 if (!aij->colmap) { 234 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 235 } 236 #if defined (PETSC_USE_CTABLE) 237 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 238 col --; 239 #else 240 col = aij->colmap[idxn[j]] - 1; 241 #endif 242 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 243 else { 244 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 245 } 246 } 247 } 248 } else { 249 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 250 } 251 } 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 257 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 258 { 259 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 260 PetscErrorCode ierr; 261 PetscInt nstash,reallocs; 262 InsertMode addv; 263 264 PetscFunctionBegin; 265 if (aij->donotstash) { 266 PetscFunctionReturn(0); 267 } 268 269 /* make sure all processors are either in INSERTMODE or ADDMODE */ 270 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 271 if (addv == (ADD_VALUES|INSERT_VALUES)) { 272 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 273 } 274 mat->insertmode = addv; /* in case this processor had no cache */ 275 276 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 277 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 278 ierr = PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 284 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 287 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 288 PetscErrorCode ierr; 289 PetscMPIInt n; 290 PetscInt i,j,rstart,ncols,flg; 291 PetscInt *row,*col,other_disassembled; 292 PetscScalar *val; 293 InsertMode addv = mat->insertmode; 294 295 PetscFunctionBegin; 296 if (!aij->donotstash) { 297 while (1) { 298 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 299 if (!flg) break; 300 301 for (i=0; i<n;) { 302 /* Now identify the consecutive vals belonging to the same row */ 303 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 304 if (j < n) ncols = j-i; 305 else ncols = n-i; 306 /* Now assemble all these values with a single function call */ 307 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 308 i = j; 309 } 310 } 311 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 312 } 313 a->compressedrow.use = PETSC_FALSE; 314 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 315 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 316 317 /* determine if any processor has disassembled, if so we must 318 also disassemble ourselfs, in order that we may reassemble. */ 319 /* 320 if nonzero structure of submatrix B cannot change then we know that 321 no processor disassembled thus we can skip this stuff 322 */ 323 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 324 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 325 if (mat->was_assembled && !other_disassembled) { 326 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 327 } 328 } 329 /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */ 330 b = (Mat_SeqAIJ *)aij->B->data; 331 332 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 333 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 334 } 335 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 336 b->compressedrow.use = PETSC_TRUE; 337 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 338 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 339 340 if (aij->rowvalues) { 341 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 342 aij->rowvalues = 0; 343 } 344 345 /* used by MatAXPY() */ 346 a->xtoy = 0; b->xtoy = 0; 347 a->XtoY = 0; b->XtoY = 0; 348 349 PetscFunctionReturn(0); 350 } 351 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 354 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 355 { 356 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 357 PetscErrorCode ierr; 358 359 PetscFunctionBegin; 360 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 361 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 362 PetscFunctionReturn(0); 363 } 364 365 #undef __FUNCT__ 366 #define __FUNCT__ "MatZeroRows_MPIAIJ" 367 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 368 { 369 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 370 PetscErrorCode ierr; 371 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 372 PetscInt i,N,*rows,*owners = l->rowners; 373 PetscInt *nprocs,j,idx,nsends,row; 374 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 375 PetscInt *rvalues,count,base,slen,*source; 376 PetscInt *lens,*lrows,*values,rstart=l->rstart; 377 MPI_Comm comm = A->comm; 378 MPI_Request *send_waits,*recv_waits; 379 MPI_Status recv_status,*send_status; 380 IS istmp; 381 #if defined(PETSC_DEBUG) 382 PetscTruth found = PETSC_FALSE; 383 #endif 384 385 PetscFunctionBegin; 386 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 387 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 388 389 /* first count number of contributors to each processor */ 390 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 391 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 392 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 393 j = 0; 394 for (i=0; i<N; i++) { 395 if (lastidx > (idx = rows[i])) j = 0; 396 lastidx = idx; 397 for (; j<size; j++) { 398 if (idx >= owners[j] && idx < owners[j+1]) { 399 nprocs[2*j]++; 400 nprocs[2*j+1] = 1; 401 owner[i] = j; 402 #if defined(PETSC_DEBUG) 403 found = PETSC_TRUE; 404 #endif 405 break; 406 } 407 } 408 #if defined(PETSC_DEBUG) 409 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 410 found = PETSC_FALSE; 411 #endif 412 } 413 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 414 415 /* inform other processors of number of messages and max length*/ 416 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 417 418 /* post receives: */ 419 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 420 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 421 for (i=0; i<nrecvs; i++) { 422 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 423 } 424 425 /* do sends: 426 1) starts[i] gives the starting index in svalues for stuff going to 427 the ith processor 428 */ 429 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 430 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 431 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 432 starts[0] = 0; 433 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 434 for (i=0; i<N; i++) { 435 svalues[starts[owner[i]]++] = rows[i]; 436 } 437 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 438 439 starts[0] = 0; 440 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 441 count = 0; 442 for (i=0; i<size; i++) { 443 if (nprocs[2*i+1]) { 444 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 445 } 446 } 447 ierr = PetscFree(starts);CHKERRQ(ierr); 448 449 base = owners[rank]; 450 451 /* wait on receives */ 452 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 453 source = lens + nrecvs; 454 count = nrecvs; slen = 0; 455 while (count) { 456 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 457 /* unpack receives into our local space */ 458 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 459 source[imdex] = recv_status.MPI_SOURCE; 460 lens[imdex] = n; 461 slen += n; 462 count--; 463 } 464 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 465 466 /* move the data into the send scatter */ 467 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 468 count = 0; 469 for (i=0; i<nrecvs; i++) { 470 values = rvalues + i*nmax; 471 for (j=0; j<lens[i]; j++) { 472 lrows[count++] = values[j] - base; 473 } 474 } 475 ierr = PetscFree(rvalues);CHKERRQ(ierr); 476 ierr = PetscFree(lens);CHKERRQ(ierr); 477 ierr = PetscFree(owner);CHKERRQ(ierr); 478 ierr = PetscFree(nprocs);CHKERRQ(ierr); 479 480 /* actually zap the local rows */ 481 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 482 ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr); 483 484 /* 485 Zero the required rows. If the "diagonal block" of the matrix 486 is square and the user wishes to set the diagonal we use seperate 487 code so that MatSetValues() is not called for each diagonal allocating 488 new memory, thus calling lots of mallocs and slowing things down. 489 490 Contributed by: Mathew Knepley 491 */ 492 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 493 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 494 if (diag && (l->A->M == l->A->N)) { 495 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 496 } else if (diag) { 497 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 498 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 499 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 500 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 501 } 502 for (i = 0; i < slen; i++) { 503 row = lrows[i] + rstart; 504 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 505 } 506 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 507 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 508 } else { 509 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 510 } 511 ierr = ISDestroy(istmp);CHKERRQ(ierr); 512 ierr = PetscFree(lrows);CHKERRQ(ierr); 513 514 /* wait on sends */ 515 if (nsends) { 516 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 517 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 518 ierr = PetscFree(send_status);CHKERRQ(ierr); 519 } 520 ierr = PetscFree(send_waits);CHKERRQ(ierr); 521 ierr = PetscFree(svalues);CHKERRQ(ierr); 522 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatMult_MPIAIJ" 528 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 529 { 530 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 531 PetscErrorCode ierr; 532 PetscInt nt; 533 534 PetscFunctionBegin; 535 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 536 if (nt != A->n) { 537 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 538 } 539 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 540 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 541 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 542 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatMultAdd_MPIAIJ" 548 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 549 { 550 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 555 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 556 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 557 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 563 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 564 { 565 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 566 PetscErrorCode ierr; 567 PetscTruth merged; 568 569 PetscFunctionBegin; 570 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 571 /* do nondiagonal part */ 572 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 573 if (!merged) { 574 /* send it on its way */ 575 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 576 /* do local part */ 577 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 578 /* receive remote parts: note this assumes the values are not actually */ 579 /* added in yy until the next line, */ 580 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 581 } else { 582 /* do local part */ 583 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 584 /* send it on its way */ 585 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 586 /* values actually were received in the Begin() but we need to call this nop */ 587 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 588 } 589 PetscFunctionReturn(0); 590 } 591 592 EXTERN_C_BEGIN 593 #undef __FUNCT__ 594 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 595 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 596 { 597 MPI_Comm comm; 598 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 599 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 600 IS Me,Notme; 601 PetscErrorCode ierr; 602 PetscInt M,N,first,last,*notme,i; 603 PetscMPIInt size; 604 605 PetscFunctionBegin; 606 607 /* Easy test: symmetric diagonal block */ 608 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 609 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 610 if (!*f) PetscFunctionReturn(0); 611 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 612 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 613 if (size == 1) PetscFunctionReturn(0); 614 615 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 616 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 617 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 618 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 619 for (i=0; i<first; i++) notme[i] = i; 620 for (i=last; i<M; i++) notme[i-last+first] = i; 621 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 622 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 623 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 624 Aoff = Aoffs[0]; 625 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 626 Boff = Boffs[0]; 627 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 628 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 629 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 630 ierr = ISDestroy(Me);CHKERRQ(ierr); 631 ierr = ISDestroy(Notme);CHKERRQ(ierr); 632 633 PetscFunctionReturn(0); 634 } 635 EXTERN_C_END 636 637 #undef __FUNCT__ 638 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 639 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 640 { 641 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 642 PetscErrorCode ierr; 643 644 PetscFunctionBegin; 645 /* do nondiagonal part */ 646 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 647 /* send it on its way */ 648 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 649 /* do local part */ 650 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 651 /* receive remote parts */ 652 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 653 PetscFunctionReturn(0); 654 } 655 656 /* 657 This only works correctly for square matrices where the subblock A->A is the 658 diagonal block 659 */ 660 #undef __FUNCT__ 661 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 662 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 663 { 664 PetscErrorCode ierr; 665 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 666 667 PetscFunctionBegin; 668 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 669 if (a->rstart != a->cstart || a->rend != a->cend) { 670 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 671 } 672 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNCT__ 677 #define __FUNCT__ "MatScale_MPIAIJ" 678 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 679 { 680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 681 PetscErrorCode ierr; 682 683 PetscFunctionBegin; 684 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 685 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 686 PetscFunctionReturn(0); 687 } 688 689 #undef __FUNCT__ 690 #define __FUNCT__ "MatDestroy_MPIAIJ" 691 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 692 { 693 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 694 PetscErrorCode ierr; 695 696 PetscFunctionBegin; 697 #if defined(PETSC_USE_LOG) 698 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 699 #endif 700 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 701 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 702 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 703 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 704 #if defined (PETSC_USE_CTABLE) 705 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 706 #else 707 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 708 #endif 709 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 710 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 711 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 712 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 713 ierr = PetscFree(aij);CHKERRQ(ierr); 714 715 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 716 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 717 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 718 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 719 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 720 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 721 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 722 PetscFunctionReturn(0); 723 } 724 725 #undef __FUNCT__ 726 #define __FUNCT__ "MatView_MPIAIJ_Binary" 727 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 728 { 729 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 730 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 731 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 732 PetscErrorCode ierr; 733 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 734 int fd; 735 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 736 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 737 PetscScalar *column_values; 738 739 PetscFunctionBegin; 740 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 741 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 742 nz = A->nz + B->nz; 743 if (!rank) { 744 header[0] = MAT_FILE_COOKIE; 745 header[1] = mat->M; 746 header[2] = mat->N; 747 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 748 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 749 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 750 /* get largest number of rows any processor has */ 751 rlen = mat->m; 752 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 753 for (i=1; i<size; i++) { 754 rlen = PetscMax(rlen,range[i+1] - range[i]); 755 } 756 } else { 757 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 758 rlen = mat->m; 759 } 760 761 /* load up the local row counts */ 762 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 763 for (i=0; i<mat->m; i++) { 764 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 765 } 766 767 /* store the row lengths to the file */ 768 if (!rank) { 769 MPI_Status status; 770 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 771 for (i=1; i<size; i++) { 772 rlen = range[i+1] - range[i]; 773 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 774 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 775 } 776 } else { 777 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 778 } 779 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 780 781 /* load up the local column indices */ 782 nzmax = nz; /* )th processor needs space a largest processor needs */ 783 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 784 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 785 cnt = 0; 786 for (i=0; i<mat->m; i++) { 787 for (j=B->i[i]; j<B->i[i+1]; j++) { 788 if ( (col = garray[B->j[j]]) > cstart) break; 789 column_indices[cnt++] = col; 790 } 791 for (k=A->i[i]; k<A->i[i+1]; k++) { 792 column_indices[cnt++] = A->j[k] + cstart; 793 } 794 for (; j<B->i[i+1]; j++) { 795 column_indices[cnt++] = garray[B->j[j]]; 796 } 797 } 798 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 799 800 /* store the column indices to the file */ 801 if (!rank) { 802 MPI_Status status; 803 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 804 for (i=1; i<size; i++) { 805 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 806 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 807 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 808 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 809 } 810 } else { 811 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 812 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 813 } 814 ierr = PetscFree(column_indices);CHKERRQ(ierr); 815 816 /* load up the local column values */ 817 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 818 cnt = 0; 819 for (i=0; i<mat->m; i++) { 820 for (j=B->i[i]; j<B->i[i+1]; j++) { 821 if ( garray[B->j[j]] > cstart) break; 822 column_values[cnt++] = B->a[j]; 823 } 824 for (k=A->i[i]; k<A->i[i+1]; k++) { 825 column_values[cnt++] = A->a[k]; 826 } 827 for (; j<B->i[i+1]; j++) { 828 column_values[cnt++] = B->a[j]; 829 } 830 } 831 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 832 833 /* store the column values to the file */ 834 if (!rank) { 835 MPI_Status status; 836 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 837 for (i=1; i<size; i++) { 838 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 839 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 840 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 841 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 842 } 843 } else { 844 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 845 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 846 } 847 ierr = PetscFree(column_values);CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 853 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 854 { 855 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 856 PetscErrorCode ierr; 857 PetscMPIInt rank = aij->rank,size = aij->size; 858 PetscTruth isdraw,iascii,isbinary; 859 PetscViewer sviewer; 860 PetscViewerFormat format; 861 862 PetscFunctionBegin; 863 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 864 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 865 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 866 if (iascii) { 867 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 868 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 869 MatInfo info; 870 PetscTruth inodes; 871 872 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 873 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 874 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 875 if (!inodes) { 876 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 877 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 878 } else { 879 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 880 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 881 } 882 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 883 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 884 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 885 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 886 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 887 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 888 PetscFunctionReturn(0); 889 } else if (format == PETSC_VIEWER_ASCII_INFO) { 890 PetscInt inodecount,inodelimit,*inodes; 891 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 892 if (inodes) { 893 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 894 } else { 895 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 896 } 897 PetscFunctionReturn(0); 898 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 899 PetscFunctionReturn(0); 900 } 901 } else if (isbinary) { 902 if (size == 1) { 903 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 904 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 905 } else { 906 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 907 } 908 PetscFunctionReturn(0); 909 } else if (isdraw) { 910 PetscDraw draw; 911 PetscTruth isnull; 912 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 913 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 914 } 915 916 if (size == 1) { 917 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 918 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 919 } else { 920 /* assemble the entire matrix onto first processor. */ 921 Mat A; 922 Mat_SeqAIJ *Aloc; 923 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 924 PetscScalar *a; 925 926 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 927 if (!rank) { 928 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 929 } else { 930 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 931 } 932 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 933 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 934 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 935 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 936 937 /* copy over the A part */ 938 Aloc = (Mat_SeqAIJ*)aij->A->data; 939 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 940 row = aij->rstart; 941 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 942 for (i=0; i<m; i++) { 943 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 944 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 945 } 946 aj = Aloc->j; 947 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 948 949 /* copy over the B part */ 950 Aloc = (Mat_SeqAIJ*)aij->B->data; 951 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 952 row = aij->rstart; 953 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 954 ct = cols; 955 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 956 for (i=0; i<m; i++) { 957 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 958 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 959 } 960 ierr = PetscFree(ct);CHKERRQ(ierr); 961 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 962 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 963 /* 964 Everyone has to call to draw the matrix since the graphics waits are 965 synchronized across all processors that share the PetscDraw object 966 */ 967 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 968 if (!rank) { 969 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 970 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 971 } 972 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 973 ierr = MatDestroy(A);CHKERRQ(ierr); 974 } 975 PetscFunctionReturn(0); 976 } 977 978 #undef __FUNCT__ 979 #define __FUNCT__ "MatView_MPIAIJ" 980 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 981 { 982 PetscErrorCode ierr; 983 PetscTruth iascii,isdraw,issocket,isbinary; 984 985 PetscFunctionBegin; 986 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 987 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 988 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 989 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 990 if (iascii || isdraw || isbinary || issocket) { 991 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 992 } else { 993 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 994 } 995 PetscFunctionReturn(0); 996 } 997 998 999 1000 #undef __FUNCT__ 1001 #define __FUNCT__ "MatRelax_MPIAIJ" 1002 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1003 { 1004 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1005 PetscErrorCode ierr; 1006 Vec bb1; 1007 PetscScalar mone=-1.0; 1008 1009 PetscFunctionBegin; 1010 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1011 1012 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1013 1014 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1015 if (flag & SOR_ZERO_INITIAL_GUESS) { 1016 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1017 its--; 1018 } 1019 1020 while (its--) { 1021 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1022 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1023 1024 /* update rhs: bb1 = bb - B*x */ 1025 ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr); 1026 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1027 1028 /* local sweep */ 1029 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1030 CHKERRQ(ierr); 1031 } 1032 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1033 if (flag & SOR_ZERO_INITIAL_GUESS) { 1034 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1035 its--; 1036 } 1037 while (its--) { 1038 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1039 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1040 1041 /* update rhs: bb1 = bb - B*x */ 1042 ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr); 1043 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1044 1045 /* local sweep */ 1046 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1047 CHKERRQ(ierr); 1048 } 1049 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1050 if (flag & SOR_ZERO_INITIAL_GUESS) { 1051 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1052 its--; 1053 } 1054 while (its--) { 1055 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1056 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1057 1058 /* update rhs: bb1 = bb - B*x */ 1059 ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr); 1060 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1061 1062 /* local sweep */ 1063 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1064 CHKERRQ(ierr); 1065 } 1066 } else { 1067 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1068 } 1069 1070 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1071 PetscFunctionReturn(0); 1072 } 1073 1074 #undef __FUNCT__ 1075 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1076 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1077 { 1078 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1079 Mat A = mat->A,B = mat->B; 1080 PetscErrorCode ierr; 1081 PetscReal isend[5],irecv[5]; 1082 1083 PetscFunctionBegin; 1084 info->block_size = 1.0; 1085 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1086 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1087 isend[3] = info->memory; isend[4] = info->mallocs; 1088 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1089 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1090 isend[3] += info->memory; isend[4] += info->mallocs; 1091 if (flag == MAT_LOCAL) { 1092 info->nz_used = isend[0]; 1093 info->nz_allocated = isend[1]; 1094 info->nz_unneeded = isend[2]; 1095 info->memory = isend[3]; 1096 info->mallocs = isend[4]; 1097 } else if (flag == MAT_GLOBAL_MAX) { 1098 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1099 info->nz_used = irecv[0]; 1100 info->nz_allocated = irecv[1]; 1101 info->nz_unneeded = irecv[2]; 1102 info->memory = irecv[3]; 1103 info->mallocs = irecv[4]; 1104 } else if (flag == MAT_GLOBAL_SUM) { 1105 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1106 info->nz_used = irecv[0]; 1107 info->nz_allocated = irecv[1]; 1108 info->nz_unneeded = irecv[2]; 1109 info->memory = irecv[3]; 1110 info->mallocs = irecv[4]; 1111 } 1112 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1113 info->fill_ratio_needed = 0; 1114 info->factor_mallocs = 0; 1115 info->rows_global = (double)matin->M; 1116 info->columns_global = (double)matin->N; 1117 info->rows_local = (double)matin->m; 1118 info->columns_local = (double)matin->N; 1119 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatSetOption_MPIAIJ" 1125 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1128 PetscErrorCode ierr; 1129 1130 PetscFunctionBegin; 1131 switch (op) { 1132 case MAT_NO_NEW_NONZERO_LOCATIONS: 1133 case MAT_YES_NEW_NONZERO_LOCATIONS: 1134 case MAT_COLUMNS_UNSORTED: 1135 case MAT_COLUMNS_SORTED: 1136 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1137 case MAT_KEEP_ZEROED_ROWS: 1138 case MAT_NEW_NONZERO_LOCATION_ERR: 1139 case MAT_USE_INODES: 1140 case MAT_DO_NOT_USE_INODES: 1141 case MAT_IGNORE_ZERO_ENTRIES: 1142 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1143 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1144 break; 1145 case MAT_ROW_ORIENTED: 1146 a->roworiented = PETSC_TRUE; 1147 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1148 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1149 break; 1150 case MAT_ROWS_SORTED: 1151 case MAT_ROWS_UNSORTED: 1152 case MAT_YES_NEW_DIAGONALS: 1153 ierr = PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr); 1154 break; 1155 case MAT_COLUMN_ORIENTED: 1156 a->roworiented = PETSC_FALSE; 1157 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1158 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1159 break; 1160 case MAT_IGNORE_OFF_PROC_ENTRIES: 1161 a->donotstash = PETSC_TRUE; 1162 break; 1163 case MAT_NO_NEW_DIAGONALS: 1164 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1165 case MAT_SYMMETRIC: 1166 case MAT_STRUCTURALLY_SYMMETRIC: 1167 case MAT_HERMITIAN: 1168 case MAT_SYMMETRY_ETERNAL: 1169 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1170 break; 1171 case MAT_NOT_SYMMETRIC: 1172 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1173 case MAT_NOT_HERMITIAN: 1174 case MAT_NOT_SYMMETRY_ETERNAL: 1175 break; 1176 default: 1177 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1178 } 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "MatGetRow_MPIAIJ" 1184 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1185 { 1186 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1187 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1188 PetscErrorCode ierr; 1189 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1190 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1191 PetscInt *cmap,*idx_p; 1192 1193 PetscFunctionBegin; 1194 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1195 mat->getrowactive = PETSC_TRUE; 1196 1197 if (!mat->rowvalues && (idx || v)) { 1198 /* 1199 allocate enough space to hold information from the longest row. 1200 */ 1201 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1202 PetscInt max = 1,tmp; 1203 for (i=0; i<matin->m; i++) { 1204 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1205 if (max < tmp) { max = tmp; } 1206 } 1207 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1208 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1209 } 1210 1211 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1212 lrow = row - rstart; 1213 1214 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1215 if (!v) {pvA = 0; pvB = 0;} 1216 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1217 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1218 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1219 nztot = nzA + nzB; 1220 1221 cmap = mat->garray; 1222 if (v || idx) { 1223 if (nztot) { 1224 /* Sort by increasing column numbers, assuming A and B already sorted */ 1225 PetscInt imark = -1; 1226 if (v) { 1227 *v = v_p = mat->rowvalues; 1228 for (i=0; i<nzB; i++) { 1229 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1230 else break; 1231 } 1232 imark = i; 1233 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1234 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1235 } 1236 if (idx) { 1237 *idx = idx_p = mat->rowindices; 1238 if (imark > -1) { 1239 for (i=0; i<imark; i++) { 1240 idx_p[i] = cmap[cworkB[i]]; 1241 } 1242 } else { 1243 for (i=0; i<nzB; i++) { 1244 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1245 else break; 1246 } 1247 imark = i; 1248 } 1249 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1250 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1251 } 1252 } else { 1253 if (idx) *idx = 0; 1254 if (v) *v = 0; 1255 } 1256 } 1257 *nz = nztot; 1258 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1259 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1265 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1266 { 1267 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1268 1269 PetscFunctionBegin; 1270 if (!aij->getrowactive) { 1271 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1272 } 1273 aij->getrowactive = PETSC_FALSE; 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatNorm_MPIAIJ" 1279 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1280 { 1281 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1282 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1283 PetscErrorCode ierr; 1284 PetscInt i,j,cstart = aij->cstart; 1285 PetscReal sum = 0.0; 1286 PetscScalar *v; 1287 1288 PetscFunctionBegin; 1289 if (aij->size == 1) { 1290 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1291 } else { 1292 if (type == NORM_FROBENIUS) { 1293 v = amat->a; 1294 for (i=0; i<amat->nz; i++) { 1295 #if defined(PETSC_USE_COMPLEX) 1296 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1297 #else 1298 sum += (*v)*(*v); v++; 1299 #endif 1300 } 1301 v = bmat->a; 1302 for (i=0; i<bmat->nz; i++) { 1303 #if defined(PETSC_USE_COMPLEX) 1304 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1305 #else 1306 sum += (*v)*(*v); v++; 1307 #endif 1308 } 1309 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1310 *norm = sqrt(*norm); 1311 } else if (type == NORM_1) { /* max column norm */ 1312 PetscReal *tmp,*tmp2; 1313 PetscInt *jj,*garray = aij->garray; 1314 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1315 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1316 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1317 *norm = 0.0; 1318 v = amat->a; jj = amat->j; 1319 for (j=0; j<amat->nz; j++) { 1320 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1321 } 1322 v = bmat->a; jj = bmat->j; 1323 for (j=0; j<bmat->nz; j++) { 1324 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1325 } 1326 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1327 for (j=0; j<mat->N; j++) { 1328 if (tmp2[j] > *norm) *norm = tmp2[j]; 1329 } 1330 ierr = PetscFree(tmp);CHKERRQ(ierr); 1331 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1332 } else if (type == NORM_INFINITY) { /* max row norm */ 1333 PetscReal ntemp = 0.0; 1334 for (j=0; j<aij->A->m; j++) { 1335 v = amat->a + amat->i[j]; 1336 sum = 0.0; 1337 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1338 sum += PetscAbsScalar(*v); v++; 1339 } 1340 v = bmat->a + bmat->i[j]; 1341 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1342 sum += PetscAbsScalar(*v); v++; 1343 } 1344 if (sum > ntemp) ntemp = sum; 1345 } 1346 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1347 } else { 1348 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1349 } 1350 } 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatTranspose_MPIAIJ" 1356 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1357 { 1358 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1359 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1360 PetscErrorCode ierr; 1361 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1362 Mat B; 1363 PetscScalar *array; 1364 1365 PetscFunctionBegin; 1366 if (!matout && M != N) { 1367 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1368 } 1369 1370 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1371 ierr = MatSetSizes(B,A->n,A->m,N,M);CHKERRQ(ierr); 1372 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1373 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1374 1375 /* copy over the A part */ 1376 Aloc = (Mat_SeqAIJ*)a->A->data; 1377 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1378 row = a->rstart; 1379 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1380 for (i=0; i<m; i++) { 1381 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1382 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1383 } 1384 aj = Aloc->j; 1385 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1386 1387 /* copy over the B part */ 1388 Aloc = (Mat_SeqAIJ*)a->B->data; 1389 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1390 row = a->rstart; 1391 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1392 ct = cols; 1393 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1394 for (i=0; i<m; i++) { 1395 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1396 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1397 } 1398 ierr = PetscFree(ct);CHKERRQ(ierr); 1399 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1400 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1401 if (matout) { 1402 *matout = B; 1403 } else { 1404 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1405 } 1406 PetscFunctionReturn(0); 1407 } 1408 1409 #undef __FUNCT__ 1410 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1411 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1412 { 1413 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1414 Mat a = aij->A,b = aij->B; 1415 PetscErrorCode ierr; 1416 PetscInt s1,s2,s3; 1417 1418 PetscFunctionBegin; 1419 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1420 if (rr) { 1421 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1422 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1423 /* Overlap communication with computation. */ 1424 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1425 } 1426 if (ll) { 1427 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1428 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1429 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1430 } 1431 /* scale the diagonal block */ 1432 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1433 1434 if (rr) { 1435 /* Do a scatter end and then right scale the off-diagonal block */ 1436 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1437 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1438 } 1439 1440 PetscFunctionReturn(0); 1441 } 1442 1443 1444 #undef __FUNCT__ 1445 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1446 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1447 { 1448 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1449 PetscErrorCode ierr; 1450 1451 PetscFunctionBegin; 1452 if (!a->rank) { 1453 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1454 } 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1460 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1461 { 1462 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1463 PetscErrorCode ierr; 1464 1465 PetscFunctionBegin; 1466 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1467 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1468 PetscFunctionReturn(0); 1469 } 1470 #undef __FUNCT__ 1471 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1472 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1473 { 1474 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1475 PetscErrorCode ierr; 1476 1477 PetscFunctionBegin; 1478 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1479 PetscFunctionReturn(0); 1480 } 1481 1482 #undef __FUNCT__ 1483 #define __FUNCT__ "MatEqual_MPIAIJ" 1484 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1485 { 1486 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1487 Mat a,b,c,d; 1488 PetscTruth flg; 1489 PetscErrorCode ierr; 1490 1491 PetscFunctionBegin; 1492 a = matA->A; b = matA->B; 1493 c = matB->A; d = matB->B; 1494 1495 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1496 if (flg) { 1497 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1498 } 1499 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "MatCopy_MPIAIJ" 1505 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1506 { 1507 PetscErrorCode ierr; 1508 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1509 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1510 1511 PetscFunctionBegin; 1512 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1513 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1514 /* because of the column compression in the off-processor part of the matrix a->B, 1515 the number of columns in a->B and b->B may be different, hence we cannot call 1516 the MatCopy() directly on the two parts. If need be, we can provide a more 1517 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1518 then copying the submatrices */ 1519 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1520 } else { 1521 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1522 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1523 } 1524 PetscFunctionReturn(0); 1525 } 1526 1527 #undef __FUNCT__ 1528 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1529 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1530 { 1531 PetscErrorCode ierr; 1532 1533 PetscFunctionBegin; 1534 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1535 PetscFunctionReturn(0); 1536 } 1537 1538 #include "petscblaslapack.h" 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatAXPY_MPIAIJ" 1541 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1542 { 1543 PetscErrorCode ierr; 1544 PetscInt i; 1545 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1546 PetscBLASInt bnz,one=1; 1547 Mat_SeqAIJ *x,*y; 1548 1549 PetscFunctionBegin; 1550 if (str == SAME_NONZERO_PATTERN) { 1551 x = (Mat_SeqAIJ *)xx->A->data; 1552 y = (Mat_SeqAIJ *)yy->A->data; 1553 bnz = (PetscBLASInt)x->nz; 1554 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1555 x = (Mat_SeqAIJ *)xx->B->data; 1556 y = (Mat_SeqAIJ *)yy->B->data; 1557 bnz = (PetscBLASInt)x->nz; 1558 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1559 } else if (str == SUBSET_NONZERO_PATTERN) { 1560 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1561 1562 x = (Mat_SeqAIJ *)xx->B->data; 1563 y = (Mat_SeqAIJ *)yy->B->data; 1564 if (y->xtoy && y->XtoY != xx->B) { 1565 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1566 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1567 } 1568 if (!y->xtoy) { /* get xtoy */ 1569 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1570 y->XtoY = xx->B; 1571 } 1572 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1573 } else { 1574 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1575 } 1576 PetscFunctionReturn(0); 1577 } 1578 1579 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1580 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "MatConjugate_MPIAIJ" 1583 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1584 { 1585 #if defined(PETSC_USE_COMPLEX) 1586 PetscErrorCode ierr; 1587 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1588 1589 PetscFunctionBegin; 1590 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1591 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1592 #else 1593 PetscFunctionBegin; 1594 #endif 1595 PetscFunctionReturn(0); 1596 } 1597 1598 /* -------------------------------------------------------------------*/ 1599 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1600 MatGetRow_MPIAIJ, 1601 MatRestoreRow_MPIAIJ, 1602 MatMult_MPIAIJ, 1603 /* 4*/ MatMultAdd_MPIAIJ, 1604 MatMultTranspose_MPIAIJ, 1605 MatMultTransposeAdd_MPIAIJ, 1606 0, 1607 0, 1608 0, 1609 /*10*/ 0, 1610 0, 1611 0, 1612 MatRelax_MPIAIJ, 1613 MatTranspose_MPIAIJ, 1614 /*15*/ MatGetInfo_MPIAIJ, 1615 MatEqual_MPIAIJ, 1616 MatGetDiagonal_MPIAIJ, 1617 MatDiagonalScale_MPIAIJ, 1618 MatNorm_MPIAIJ, 1619 /*20*/ MatAssemblyBegin_MPIAIJ, 1620 MatAssemblyEnd_MPIAIJ, 1621 0, 1622 MatSetOption_MPIAIJ, 1623 MatZeroEntries_MPIAIJ, 1624 /*25*/ MatZeroRows_MPIAIJ, 1625 0, 1626 0, 1627 0, 1628 0, 1629 /*30*/ MatSetUpPreallocation_MPIAIJ, 1630 0, 1631 0, 1632 0, 1633 0, 1634 /*35*/ MatDuplicate_MPIAIJ, 1635 0, 1636 0, 1637 0, 1638 0, 1639 /*40*/ MatAXPY_MPIAIJ, 1640 MatGetSubMatrices_MPIAIJ, 1641 MatIncreaseOverlap_MPIAIJ, 1642 MatGetValues_MPIAIJ, 1643 MatCopy_MPIAIJ, 1644 /*45*/ MatPrintHelp_MPIAIJ, 1645 MatScale_MPIAIJ, 1646 0, 1647 0, 1648 0, 1649 /*50*/ MatSetBlockSize_MPIAIJ, 1650 0, 1651 0, 1652 0, 1653 0, 1654 /*55*/ MatFDColoringCreate_MPIAIJ, 1655 0, 1656 MatSetUnfactored_MPIAIJ, 1657 0, 1658 0, 1659 /*60*/ MatGetSubMatrix_MPIAIJ, 1660 MatDestroy_MPIAIJ, 1661 MatView_MPIAIJ, 1662 MatGetPetscMaps_Petsc, 1663 0, 1664 /*65*/ 0, 1665 0, 1666 0, 1667 0, 1668 0, 1669 /*70*/ 0, 1670 0, 1671 MatSetColoring_MPIAIJ, 1672 #if defined(PETSC_HAVE_ADIC) 1673 MatSetValuesAdic_MPIAIJ, 1674 #else 1675 0, 1676 #endif 1677 MatSetValuesAdifor_MPIAIJ, 1678 /*75*/ 0, 1679 0, 1680 0, 1681 0, 1682 0, 1683 /*80*/ 0, 1684 0, 1685 0, 1686 0, 1687 /*84*/ MatLoad_MPIAIJ, 1688 0, 1689 0, 1690 0, 1691 0, 1692 0, 1693 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1694 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1695 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1696 MatPtAP_Basic, 1697 MatPtAPSymbolic_MPIAIJ, 1698 /*95*/ MatPtAPNumeric_MPIAIJ, 1699 0, 1700 0, 1701 0, 1702 0, 1703 /*100*/0, 1704 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1705 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1706 MatConjugate_MPIAIJ 1707 }; 1708 1709 /* ----------------------------------------------------------------------------------------*/ 1710 1711 EXTERN_C_BEGIN 1712 #undef __FUNCT__ 1713 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1714 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1715 { 1716 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1717 PetscErrorCode ierr; 1718 1719 PetscFunctionBegin; 1720 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1721 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1722 PetscFunctionReturn(0); 1723 } 1724 EXTERN_C_END 1725 1726 EXTERN_C_BEGIN 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1729 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1730 { 1731 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1732 PetscErrorCode ierr; 1733 1734 PetscFunctionBegin; 1735 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1736 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1737 PetscFunctionReturn(0); 1738 } 1739 EXTERN_C_END 1740 1741 #include "petscpc.h" 1742 EXTERN_C_BEGIN 1743 #undef __FUNCT__ 1744 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1745 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1746 { 1747 Mat_MPIAIJ *b; 1748 PetscErrorCode ierr; 1749 PetscInt i; 1750 1751 PetscFunctionBegin; 1752 B->preallocated = PETSC_TRUE; 1753 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1754 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1755 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1756 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1757 if (d_nnz) { 1758 for (i=0; i<B->m; i++) { 1759 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1760 } 1761 } 1762 if (o_nnz) { 1763 for (i=0; i<B->m; i++) { 1764 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1765 } 1766 } 1767 b = (Mat_MPIAIJ*)B->data; 1768 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1769 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1770 1771 PetscFunctionReturn(0); 1772 } 1773 EXTERN_C_END 1774 1775 #undef __FUNCT__ 1776 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1777 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1778 { 1779 Mat mat; 1780 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1781 PetscErrorCode ierr; 1782 1783 PetscFunctionBegin; 1784 *newmat = 0; 1785 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1786 ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr); 1787 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1788 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1789 a = (Mat_MPIAIJ*)mat->data; 1790 1791 mat->factor = matin->factor; 1792 mat->bs = matin->bs; 1793 mat->assembled = PETSC_TRUE; 1794 mat->insertmode = NOT_SET_VALUES; 1795 mat->preallocated = PETSC_TRUE; 1796 1797 a->rstart = oldmat->rstart; 1798 a->rend = oldmat->rend; 1799 a->cstart = oldmat->cstart; 1800 a->cend = oldmat->cend; 1801 a->size = oldmat->size; 1802 a->rank = oldmat->rank; 1803 a->donotstash = oldmat->donotstash; 1804 a->roworiented = oldmat->roworiented; 1805 a->rowindices = 0; 1806 a->rowvalues = 0; 1807 a->getrowactive = PETSC_FALSE; 1808 1809 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1810 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1811 if (oldmat->colmap) { 1812 #if defined (PETSC_USE_CTABLE) 1813 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1814 #else 1815 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1816 ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1817 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1818 #endif 1819 } else a->colmap = 0; 1820 if (oldmat->garray) { 1821 PetscInt len; 1822 len = oldmat->B->n; 1823 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1824 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1825 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1826 } else a->garray = 0; 1827 1828 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1829 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1830 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1831 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1832 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1833 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1834 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1835 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1836 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1837 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1838 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1839 *newmat = mat; 1840 PetscFunctionReturn(0); 1841 } 1842 1843 #include "petscsys.h" 1844 1845 #undef __FUNCT__ 1846 #define __FUNCT__ "MatLoad_MPIAIJ" 1847 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 1848 { 1849 Mat A; 1850 PetscScalar *vals,*svals; 1851 MPI_Comm comm = ((PetscObject)viewer)->comm; 1852 MPI_Status status; 1853 PetscErrorCode ierr; 1854 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1855 PetscInt i,nz,j,rstart,rend,mmax; 1856 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1857 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1858 PetscInt cend,cstart,n,*rowners; 1859 int fd; 1860 1861 PetscFunctionBegin; 1862 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1863 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1864 if (!rank) { 1865 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1866 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1867 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1868 } 1869 1870 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1871 M = header[1]; N = header[2]; 1872 /* determine ownership of all rows */ 1873 m = M/size + ((M % size) > rank); 1874 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1875 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1876 1877 /* First process needs enough room for process with most rows */ 1878 if (!rank) { 1879 mmax = rowners[1]; 1880 for (i=2; i<size; i++) { 1881 mmax = PetscMax(mmax,rowners[i]); 1882 } 1883 } else mmax = m; 1884 1885 rowners[0] = 0; 1886 for (i=2; i<=size; i++) { 1887 mmax = PetscMax(mmax,rowners[i]); 1888 rowners[i] += rowners[i-1]; 1889 } 1890 rstart = rowners[rank]; 1891 rend = rowners[rank+1]; 1892 1893 /* distribute row lengths to all processors */ 1894 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 1895 if (!rank) { 1896 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1897 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1898 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1899 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1900 for (j=0; j<m; j++) { 1901 procsnz[0] += ourlens[j]; 1902 } 1903 for (i=1; i<size; i++) { 1904 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1905 /* calculate the number of nonzeros on each processor */ 1906 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1907 procsnz[i] += rowlengths[j]; 1908 } 1909 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1910 } 1911 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1912 } else { 1913 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1914 } 1915 1916 if (!rank) { 1917 /* determine max buffer needed and allocate it */ 1918 maxnz = 0; 1919 for (i=0; i<size; i++) { 1920 maxnz = PetscMax(maxnz,procsnz[i]); 1921 } 1922 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1923 1924 /* read in my part of the matrix column indices */ 1925 nz = procsnz[0]; 1926 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1927 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1928 1929 /* read in every one elses and ship off */ 1930 for (i=1; i<size; i++) { 1931 nz = procsnz[i]; 1932 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1933 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1934 } 1935 ierr = PetscFree(cols);CHKERRQ(ierr); 1936 } else { 1937 /* determine buffer space needed for message */ 1938 nz = 0; 1939 for (i=0; i<m; i++) { 1940 nz += ourlens[i]; 1941 } 1942 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1943 1944 /* receive message of column indices*/ 1945 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1946 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1947 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1948 } 1949 1950 /* determine column ownership if matrix is not square */ 1951 if (N != M) { 1952 n = N/size + ((N % size) > rank); 1953 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1954 cstart = cend - n; 1955 } else { 1956 cstart = rstart; 1957 cend = rend; 1958 n = cend - cstart; 1959 } 1960 1961 /* loop over local rows, determining number of off diagonal entries */ 1962 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1963 jj = 0; 1964 for (i=0; i<m; i++) { 1965 for (j=0; j<ourlens[i]; j++) { 1966 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1967 jj++; 1968 } 1969 } 1970 1971 /* create our matrix */ 1972 for (i=0; i<m; i++) { 1973 ourlens[i] -= offlens[i]; 1974 } 1975 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 1976 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 1977 ierr = MatSetType(A,type);CHKERRQ(ierr); 1978 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 1979 1980 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1981 for (i=0; i<m; i++) { 1982 ourlens[i] += offlens[i]; 1983 } 1984 1985 if (!rank) { 1986 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1987 1988 /* read in my part of the matrix numerical values */ 1989 nz = procsnz[0]; 1990 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1991 1992 /* insert into matrix */ 1993 jj = rstart; 1994 smycols = mycols; 1995 svals = vals; 1996 for (i=0; i<m; i++) { 1997 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1998 smycols += ourlens[i]; 1999 svals += ourlens[i]; 2000 jj++; 2001 } 2002 2003 /* read in other processors and ship out */ 2004 for (i=1; i<size; i++) { 2005 nz = procsnz[i]; 2006 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2007 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2008 } 2009 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2010 } else { 2011 /* receive numeric values */ 2012 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2013 2014 /* receive message of values*/ 2015 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2016 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2017 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2018 2019 /* insert into matrix */ 2020 jj = rstart; 2021 smycols = mycols; 2022 svals = vals; 2023 for (i=0; i<m; i++) { 2024 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2025 smycols += ourlens[i]; 2026 svals += ourlens[i]; 2027 jj++; 2028 } 2029 } 2030 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2031 ierr = PetscFree(vals);CHKERRQ(ierr); 2032 ierr = PetscFree(mycols);CHKERRQ(ierr); 2033 ierr = PetscFree(rowners);CHKERRQ(ierr); 2034 2035 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2036 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2037 *newmat = A; 2038 PetscFunctionReturn(0); 2039 } 2040 2041 #undef __FUNCT__ 2042 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2043 /* 2044 Not great since it makes two copies of the submatrix, first an SeqAIJ 2045 in local and then by concatenating the local matrices the end result. 2046 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2047 */ 2048 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2049 { 2050 PetscErrorCode ierr; 2051 PetscMPIInt rank,size; 2052 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2053 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2054 Mat *local,M,Mreuse; 2055 PetscScalar *vwork,*aa; 2056 MPI_Comm comm = mat->comm; 2057 Mat_SeqAIJ *aij; 2058 2059 2060 PetscFunctionBegin; 2061 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2062 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2063 2064 if (call == MAT_REUSE_MATRIX) { 2065 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2066 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2067 local = &Mreuse; 2068 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2069 } else { 2070 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2071 Mreuse = *local; 2072 ierr = PetscFree(local);CHKERRQ(ierr); 2073 } 2074 2075 /* 2076 m - number of local rows 2077 n - number of columns (same on all processors) 2078 rstart - first row in new global matrix generated 2079 */ 2080 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2081 if (call == MAT_INITIAL_MATRIX) { 2082 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2083 ii = aij->i; 2084 jj = aij->j; 2085 2086 /* 2087 Determine the number of non-zeros in the diagonal and off-diagonal 2088 portions of the matrix in order to do correct preallocation 2089 */ 2090 2091 /* first get start and end of "diagonal" columns */ 2092 if (csize == PETSC_DECIDE) { 2093 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2094 if (mglobal == n) { /* square matrix */ 2095 nlocal = m; 2096 } else { 2097 nlocal = n/size + ((n % size) > rank); 2098 } 2099 } else { 2100 nlocal = csize; 2101 } 2102 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2103 rstart = rend - nlocal; 2104 if (rank == size - 1 && rend != n) { 2105 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2106 } 2107 2108 /* next, compute all the lengths */ 2109 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2110 olens = dlens + m; 2111 for (i=0; i<m; i++) { 2112 jend = ii[i+1] - ii[i]; 2113 olen = 0; 2114 dlen = 0; 2115 for (j=0; j<jend; j++) { 2116 if (*jj < rstart || *jj >= rend) olen++; 2117 else dlen++; 2118 jj++; 2119 } 2120 olens[i] = olen; 2121 dlens[i] = dlen; 2122 } 2123 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2124 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2125 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2126 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2127 ierr = PetscFree(dlens);CHKERRQ(ierr); 2128 } else { 2129 PetscInt ml,nl; 2130 2131 M = *newmat; 2132 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2133 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2134 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2135 /* 2136 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2137 rather than the slower MatSetValues(). 2138 */ 2139 M->was_assembled = PETSC_TRUE; 2140 M->assembled = PETSC_FALSE; 2141 } 2142 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2143 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2144 ii = aij->i; 2145 jj = aij->j; 2146 aa = aij->a; 2147 for (i=0; i<m; i++) { 2148 row = rstart + i; 2149 nz = ii[i+1] - ii[i]; 2150 cwork = jj; jj += nz; 2151 vwork = aa; aa += nz; 2152 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2153 } 2154 2155 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2156 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2157 *newmat = M; 2158 2159 /* save submatrix used in processor for next request */ 2160 if (call == MAT_INITIAL_MATRIX) { 2161 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2162 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2163 } 2164 2165 PetscFunctionReturn(0); 2166 } 2167 2168 EXTERN_C_BEGIN 2169 #undef __FUNCT__ 2170 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2171 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2172 { 2173 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2174 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2175 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2176 const PetscInt *JJ; 2177 PetscScalar *values; 2178 PetscErrorCode ierr; 2179 2180 PetscFunctionBegin; 2181 #if defined(PETSC_OPT_g) 2182 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2183 #endif 2184 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2185 o_nnz = d_nnz + m; 2186 2187 for (i=0; i<m; i++) { 2188 nnz = I[i+1]- I[i]; 2189 JJ = J + I[i]; 2190 nnz_max = PetscMax(nnz_max,nnz); 2191 #if defined(PETSC_OPT_g) 2192 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2193 #endif 2194 for (j=0; j<nnz; j++) { 2195 if (*JJ >= cstart) break; 2196 JJ++; 2197 } 2198 d = 0; 2199 for (; j<nnz; j++) { 2200 if (*JJ++ >= cend) break; 2201 d++; 2202 } 2203 d_nnz[i] = d; 2204 o_nnz[i] = nnz - d; 2205 } 2206 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2207 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2208 2209 if (v) values = (PetscScalar*)v; 2210 else { 2211 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2212 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2213 } 2214 2215 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2216 for (i=0; i<m; i++) { 2217 ii = i + rstart; 2218 nnz = I[i+1]- I[i]; 2219 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2220 } 2221 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2222 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2223 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2224 2225 if (!v) { 2226 ierr = PetscFree(values);CHKERRQ(ierr); 2227 } 2228 PetscFunctionReturn(0); 2229 } 2230 EXTERN_C_END 2231 2232 #undef __FUNCT__ 2233 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2234 /*@C 2235 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2236 (the default parallel PETSc format). 2237 2238 Collective on MPI_Comm 2239 2240 Input Parameters: 2241 + A - the matrix 2242 . i - the indices into j for the start of each local row (starts with zero) 2243 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2244 - v - optional values in the matrix 2245 2246 Level: developer 2247 2248 .keywords: matrix, aij, compressed row, sparse, parallel 2249 2250 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2251 @*/ 2252 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2253 { 2254 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2255 2256 PetscFunctionBegin; 2257 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2258 if (f) { 2259 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2260 } 2261 PetscFunctionReturn(0); 2262 } 2263 2264 #undef __FUNCT__ 2265 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2266 /*@C 2267 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2268 (the default parallel PETSc format). For good matrix assembly performance 2269 the user should preallocate the matrix storage by setting the parameters 2270 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2271 performance can be increased by more than a factor of 50. 2272 2273 Collective on MPI_Comm 2274 2275 Input Parameters: 2276 + A - the matrix 2277 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2278 (same value is used for all local rows) 2279 . d_nnz - array containing the number of nonzeros in the various rows of the 2280 DIAGONAL portion of the local submatrix (possibly different for each row) 2281 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2282 The size of this array is equal to the number of local rows, i.e 'm'. 2283 You must leave room for the diagonal entry even if it is zero. 2284 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2285 submatrix (same value is used for all local rows). 2286 - o_nnz - array containing the number of nonzeros in the various rows of the 2287 OFF-DIAGONAL portion of the local submatrix (possibly different for 2288 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2289 structure. The size of this array is equal to the number 2290 of local rows, i.e 'm'. 2291 2292 If the *_nnz parameter is given then the *_nz parameter is ignored 2293 2294 The AIJ format (also called the Yale sparse matrix format or 2295 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2296 storage. The stored row and column indices begin with zero. See the users manual for details. 2297 2298 The parallel matrix is partitioned such that the first m0 rows belong to 2299 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2300 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2301 2302 The DIAGONAL portion of the local submatrix of a processor can be defined 2303 as the submatrix which is obtained by extraction the part corresponding 2304 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2305 first row that belongs to the processor, and r2 is the last row belonging 2306 to the this processor. This is a square mxm matrix. The remaining portion 2307 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2308 2309 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2310 2311 Example usage: 2312 2313 Consider the following 8x8 matrix with 34 non-zero values, that is 2314 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2315 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2316 as follows: 2317 2318 .vb 2319 1 2 0 | 0 3 0 | 0 4 2320 Proc0 0 5 6 | 7 0 0 | 8 0 2321 9 0 10 | 11 0 0 | 12 0 2322 ------------------------------------- 2323 13 0 14 | 15 16 17 | 0 0 2324 Proc1 0 18 0 | 19 20 21 | 0 0 2325 0 0 0 | 22 23 0 | 24 0 2326 ------------------------------------- 2327 Proc2 25 26 27 | 0 0 28 | 29 0 2328 30 0 0 | 31 32 33 | 0 34 2329 .ve 2330 2331 This can be represented as a collection of submatrices as: 2332 2333 .vb 2334 A B C 2335 D E F 2336 G H I 2337 .ve 2338 2339 Where the submatrices A,B,C are owned by proc0, D,E,F are 2340 owned by proc1, G,H,I are owned by proc2. 2341 2342 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2343 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2344 The 'M','N' parameters are 8,8, and have the same values on all procs. 2345 2346 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2347 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2348 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2349 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2350 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2351 matrix, ans [DF] as another SeqAIJ matrix. 2352 2353 When d_nz, o_nz parameters are specified, d_nz storage elements are 2354 allocated for every row of the local diagonal submatrix, and o_nz 2355 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2356 One way to choose d_nz and o_nz is to use the max nonzerors per local 2357 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2358 In this case, the values of d_nz,o_nz are: 2359 .vb 2360 proc0 : dnz = 2, o_nz = 2 2361 proc1 : dnz = 3, o_nz = 2 2362 proc2 : dnz = 1, o_nz = 4 2363 .ve 2364 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2365 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2366 for proc3. i.e we are using 12+15+10=37 storage locations to store 2367 34 values. 2368 2369 When d_nnz, o_nnz parameters are specified, the storage is specified 2370 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2371 In the above case the values for d_nnz,o_nnz are: 2372 .vb 2373 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2374 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2375 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2376 .ve 2377 Here the space allocated is sum of all the above values i.e 34, and 2378 hence pre-allocation is perfect. 2379 2380 Level: intermediate 2381 2382 .keywords: matrix, aij, compressed row, sparse, parallel 2383 2384 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2385 MPIAIJ 2386 @*/ 2387 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2388 { 2389 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2390 2391 PetscFunctionBegin; 2392 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2393 if (f) { 2394 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2395 } 2396 PetscFunctionReturn(0); 2397 } 2398 2399 #undef __FUNCT__ 2400 #define __FUNCT__ "MatCreateMPIAIJ" 2401 /*@C 2402 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2403 (the default parallel PETSc format). For good matrix assembly performance 2404 the user should preallocate the matrix storage by setting the parameters 2405 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2406 performance can be increased by more than a factor of 50. 2407 2408 Collective on MPI_Comm 2409 2410 Input Parameters: 2411 + comm - MPI communicator 2412 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2413 This value should be the same as the local size used in creating the 2414 y vector for the matrix-vector product y = Ax. 2415 . n - This value should be the same as the local size used in creating the 2416 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2417 calculated if N is given) For square matrices n is almost always m. 2418 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2419 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2420 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2421 (same value is used for all local rows) 2422 . d_nnz - array containing the number of nonzeros in the various rows of the 2423 DIAGONAL portion of the local submatrix (possibly different for each row) 2424 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2425 The size of this array is equal to the number of local rows, i.e 'm'. 2426 You must leave room for the diagonal entry even if it is zero. 2427 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2428 submatrix (same value is used for all local rows). 2429 - o_nnz - array containing the number of nonzeros in the various rows of the 2430 OFF-DIAGONAL portion of the local submatrix (possibly different for 2431 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2432 structure. The size of this array is equal to the number 2433 of local rows, i.e 'm'. 2434 2435 Output Parameter: 2436 . A - the matrix 2437 2438 Notes: 2439 If the *_nnz parameter is given then the *_nz parameter is ignored 2440 2441 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2442 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2443 storage requirements for this matrix. 2444 2445 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2446 processor than it must be used on all processors that share the object for 2447 that argument. 2448 2449 The user MUST specify either the local or global matrix dimensions 2450 (possibly both). 2451 2452 The parallel matrix is partitioned such that the first m0 rows belong to 2453 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2454 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2455 2456 The DIAGONAL portion of the local submatrix of a processor can be defined 2457 as the submatrix which is obtained by extraction the part corresponding 2458 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2459 first row that belongs to the processor, and r2 is the last row belonging 2460 to the this processor. This is a square mxm matrix. The remaining portion 2461 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2462 2463 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2464 2465 When calling this routine with a single process communicator, a matrix of 2466 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2467 type of communicator, use the construction mechanism: 2468 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2469 2470 By default, this format uses inodes (identical nodes) when possible. 2471 We search for consecutive rows with the same nonzero structure, thereby 2472 reusing matrix information to achieve increased efficiency. 2473 2474 Options Database Keys: 2475 + -mat_no_inode - Do not use inodes 2476 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2477 - -mat_aij_oneindex - Internally use indexing starting at 1 2478 rather than 0. Note that when calling MatSetValues(), 2479 the user still MUST index entries starting at 0! 2480 2481 2482 Example usage: 2483 2484 Consider the following 8x8 matrix with 34 non-zero values, that is 2485 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2486 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2487 as follows: 2488 2489 .vb 2490 1 2 0 | 0 3 0 | 0 4 2491 Proc0 0 5 6 | 7 0 0 | 8 0 2492 9 0 10 | 11 0 0 | 12 0 2493 ------------------------------------- 2494 13 0 14 | 15 16 17 | 0 0 2495 Proc1 0 18 0 | 19 20 21 | 0 0 2496 0 0 0 | 22 23 0 | 24 0 2497 ------------------------------------- 2498 Proc2 25 26 27 | 0 0 28 | 29 0 2499 30 0 0 | 31 32 33 | 0 34 2500 .ve 2501 2502 This can be represented as a collection of submatrices as: 2503 2504 .vb 2505 A B C 2506 D E F 2507 G H I 2508 .ve 2509 2510 Where the submatrices A,B,C are owned by proc0, D,E,F are 2511 owned by proc1, G,H,I are owned by proc2. 2512 2513 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2514 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2515 The 'M','N' parameters are 8,8, and have the same values on all procs. 2516 2517 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2518 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2519 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2520 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2521 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2522 matrix, ans [DF] as another SeqAIJ matrix. 2523 2524 When d_nz, o_nz parameters are specified, d_nz storage elements are 2525 allocated for every row of the local diagonal submatrix, and o_nz 2526 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2527 One way to choose d_nz and o_nz is to use the max nonzerors per local 2528 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2529 In this case, the values of d_nz,o_nz are: 2530 .vb 2531 proc0 : dnz = 2, o_nz = 2 2532 proc1 : dnz = 3, o_nz = 2 2533 proc2 : dnz = 1, o_nz = 4 2534 .ve 2535 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2536 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2537 for proc3. i.e we are using 12+15+10=37 storage locations to store 2538 34 values. 2539 2540 When d_nnz, o_nnz parameters are specified, the storage is specified 2541 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2542 In the above case the values for d_nnz,o_nnz are: 2543 .vb 2544 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2545 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2546 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2547 .ve 2548 Here the space allocated is sum of all the above values i.e 34, and 2549 hence pre-allocation is perfect. 2550 2551 Level: intermediate 2552 2553 .keywords: matrix, aij, compressed row, sparse, parallel 2554 2555 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2556 MPIAIJ 2557 @*/ 2558 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2559 { 2560 PetscErrorCode ierr; 2561 PetscMPIInt size; 2562 2563 PetscFunctionBegin; 2564 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2565 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2566 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2567 if (size > 1) { 2568 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2569 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2570 } else { 2571 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2572 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2573 } 2574 PetscFunctionReturn(0); 2575 } 2576 2577 #undef __FUNCT__ 2578 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2579 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2580 { 2581 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2582 2583 PetscFunctionBegin; 2584 *Ad = a->A; 2585 *Ao = a->B; 2586 *colmap = a->garray; 2587 PetscFunctionReturn(0); 2588 } 2589 2590 #undef __FUNCT__ 2591 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2592 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2593 { 2594 PetscErrorCode ierr; 2595 PetscInt i; 2596 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2597 2598 PetscFunctionBegin; 2599 if (coloring->ctype == IS_COLORING_LOCAL) { 2600 ISColoringValue *allcolors,*colors; 2601 ISColoring ocoloring; 2602 2603 /* set coloring for diagonal portion */ 2604 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2605 2606 /* set coloring for off-diagonal portion */ 2607 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2608 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2609 for (i=0; i<a->B->n; i++) { 2610 colors[i] = allcolors[a->garray[i]]; 2611 } 2612 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2613 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2614 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2615 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2616 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2617 ISColoringValue *colors; 2618 PetscInt *larray; 2619 ISColoring ocoloring; 2620 2621 /* set coloring for diagonal portion */ 2622 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2623 for (i=0; i<a->A->n; i++) { 2624 larray[i] = i + a->cstart; 2625 } 2626 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2627 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2628 for (i=0; i<a->A->n; i++) { 2629 colors[i] = coloring->colors[larray[i]]; 2630 } 2631 ierr = PetscFree(larray);CHKERRQ(ierr); 2632 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2633 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2634 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2635 2636 /* set coloring for off-diagonal portion */ 2637 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2638 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2639 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2640 for (i=0; i<a->B->n; i++) { 2641 colors[i] = coloring->colors[larray[i]]; 2642 } 2643 ierr = PetscFree(larray);CHKERRQ(ierr); 2644 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2645 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2646 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2647 } else { 2648 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2649 } 2650 2651 PetscFunctionReturn(0); 2652 } 2653 2654 #if defined(PETSC_HAVE_ADIC) 2655 #undef __FUNCT__ 2656 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2657 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2658 { 2659 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2660 PetscErrorCode ierr; 2661 2662 PetscFunctionBegin; 2663 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2664 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2665 PetscFunctionReturn(0); 2666 } 2667 #endif 2668 2669 #undef __FUNCT__ 2670 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2671 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2672 { 2673 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2674 PetscErrorCode ierr; 2675 2676 PetscFunctionBegin; 2677 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2678 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2679 PetscFunctionReturn(0); 2680 } 2681 2682 #undef __FUNCT__ 2683 #define __FUNCT__ "MatMerge" 2684 /*@C 2685 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2686 matrices from each processor 2687 2688 Collective on MPI_Comm 2689 2690 Input Parameters: 2691 + comm - the communicators the parallel matrix will live on 2692 . inmat - the input sequential matrices 2693 . n - number of local columns (or PETSC_DECIDE) 2694 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2695 2696 Output Parameter: 2697 . outmat - the parallel matrix generated 2698 2699 Level: advanced 2700 2701 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2702 2703 @*/ 2704 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2705 { 2706 PetscErrorCode ierr; 2707 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2708 PetscInt *indx; 2709 PetscScalar *values; 2710 PetscMap columnmap,rowmap; 2711 2712 PetscFunctionBegin; 2713 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2714 /* 2715 PetscMPIInt rank; 2716 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2717 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2718 */ 2719 if (scall == MAT_INITIAL_MATRIX){ 2720 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2721 if (n == PETSC_DECIDE){ 2722 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2723 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2724 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2725 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2726 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2727 } 2728 2729 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2730 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2731 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2732 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2733 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2734 2735 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2736 for (i=0;i<m;i++) { 2737 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2738 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2739 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2740 } 2741 /* This routine will ONLY return MPIAIJ type matrix */ 2742 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2743 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2744 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2745 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2746 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2747 2748 } else if (scall == MAT_REUSE_MATRIX){ 2749 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2750 } else { 2751 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2752 } 2753 2754 for (i=0;i<m;i++) { 2755 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2756 I = i + rstart; 2757 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2758 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2759 } 2760 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2761 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2762 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2763 2764 PetscFunctionReturn(0); 2765 } 2766 2767 #undef __FUNCT__ 2768 #define __FUNCT__ "MatFileSplit" 2769 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2770 { 2771 PetscErrorCode ierr; 2772 PetscMPIInt rank; 2773 PetscInt m,N,i,rstart,nnz; 2774 size_t len; 2775 const PetscInt *indx; 2776 PetscViewer out; 2777 char *name; 2778 Mat B; 2779 const PetscScalar *values; 2780 2781 PetscFunctionBegin; 2782 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2783 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2784 /* Should this be the type of the diagonal block of A? */ 2785 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2786 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 2787 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2788 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2789 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2790 for (i=0;i<m;i++) { 2791 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2792 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2793 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2794 } 2795 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2796 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2797 2798 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2799 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2800 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2801 sprintf(name,"%s.%d",outfile,rank); 2802 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2803 ierr = PetscFree(name); 2804 ierr = MatView(B,out);CHKERRQ(ierr); 2805 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2806 ierr = MatDestroy(B);CHKERRQ(ierr); 2807 PetscFunctionReturn(0); 2808 } 2809 2810 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2811 #undef __FUNCT__ 2812 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2813 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2814 { 2815 PetscErrorCode ierr; 2816 Mat_Merge_SeqsToMPI *merge; 2817 PetscObjectContainer container; 2818 2819 PetscFunctionBegin; 2820 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2821 if (container) { 2822 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2823 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2824 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2825 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2826 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2827 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2828 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2829 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2830 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2831 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2832 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2833 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2834 2835 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2836 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2837 } 2838 ierr = PetscFree(merge);CHKERRQ(ierr); 2839 2840 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2841 PetscFunctionReturn(0); 2842 } 2843 2844 #include "src/mat/utils/freespace.h" 2845 #include "petscbt.h" 2846 static PetscEvent logkey_seqstompinum = 0; 2847 #undef __FUNCT__ 2848 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 2849 /*@C 2850 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2851 matrices from each processor 2852 2853 Collective on MPI_Comm 2854 2855 Input Parameters: 2856 + comm - the communicators the parallel matrix will live on 2857 . seqmat - the input sequential matrices 2858 . m - number of local rows (or PETSC_DECIDE) 2859 . n - number of local columns (or PETSC_DECIDE) 2860 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2861 2862 Output Parameter: 2863 . mpimat - the parallel matrix generated 2864 2865 Level: advanced 2866 2867 Notes: 2868 The dimensions of the sequential matrix in each processor MUST be the same. 2869 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2870 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2871 @*/ 2872 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2873 { 2874 PetscErrorCode ierr; 2875 MPI_Comm comm=mpimat->comm; 2876 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2877 PetscMPIInt size,rank,taga,*len_s; 2878 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2879 PetscInt proc,m; 2880 PetscInt **buf_ri,**buf_rj; 2881 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2882 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2883 MPI_Request *s_waits,*r_waits; 2884 MPI_Status *status; 2885 MatScalar *aa=a->a,**abuf_r,*ba_i; 2886 Mat_Merge_SeqsToMPI *merge; 2887 PetscObjectContainer container; 2888 2889 PetscFunctionBegin; 2890 if (!logkey_seqstompinum) { 2891 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2892 } 2893 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2894 2895 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2896 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2897 2898 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2899 if (container) { 2900 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2901 } 2902 bi = merge->bi; 2903 bj = merge->bj; 2904 buf_ri = merge->buf_ri; 2905 buf_rj = merge->buf_rj; 2906 2907 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2908 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2909 len_s = merge->len_s; 2910 2911 /* send and recv matrix values */ 2912 /*-----------------------------*/ 2913 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2914 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2915 2916 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2917 for (proc=0,k=0; proc<size; proc++){ 2918 if (!len_s[proc]) continue; 2919 i = owners[proc]; 2920 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2921 k++; 2922 } 2923 2924 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 2925 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 2926 ierr = PetscFree(status);CHKERRQ(ierr); 2927 2928 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2929 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2930 2931 /* insert mat values of mpimat */ 2932 /*----------------------------*/ 2933 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2934 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2935 nextrow = buf_ri_k + merge->nrecv; 2936 nextai = nextrow + merge->nrecv; 2937 2938 for (k=0; k<merge->nrecv; k++){ 2939 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2940 nrows = *(buf_ri_k[k]); 2941 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2942 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2943 } 2944 2945 /* set values of ba */ 2946 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2947 for (i=0; i<m; i++) { 2948 arow = owners[rank] + i; 2949 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2950 bnzi = bi[i+1] - bi[i]; 2951 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2952 2953 /* add local non-zero vals of this proc's seqmat into ba */ 2954 anzi = ai[arow+1] - ai[arow]; 2955 aj = a->j + ai[arow]; 2956 aa = a->a + ai[arow]; 2957 nextaj = 0; 2958 for (j=0; nextaj<anzi; j++){ 2959 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2960 ba_i[j] += aa[nextaj++]; 2961 } 2962 } 2963 2964 /* add received vals into ba */ 2965 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2966 /* i-th row */ 2967 if (i == *nextrow[k]) { 2968 anzi = *(nextai[k]+1) - *nextai[k]; 2969 aj = buf_rj[k] + *(nextai[k]); 2970 aa = abuf_r[k] + *(nextai[k]); 2971 nextaj = 0; 2972 for (j=0; nextaj<anzi; j++){ 2973 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2974 ba_i[j] += aa[nextaj++]; 2975 } 2976 } 2977 nextrow[k]++; nextai[k]++; 2978 } 2979 } 2980 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 2981 } 2982 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2983 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2984 2985 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 2986 ierr = PetscFree(ba_i);CHKERRQ(ierr); 2987 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 2988 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2989 PetscFunctionReturn(0); 2990 } 2991 2992 static PetscEvent logkey_seqstompisym = 0; 2993 #undef __FUNCT__ 2994 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 2995 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 2996 { 2997 PetscErrorCode ierr; 2998 Mat B_mpi; 2999 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3000 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3001 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3002 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3003 PetscInt len,proc,*dnz,*onz; 3004 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3005 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3006 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3007 MPI_Status *status; 3008 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3009 PetscBT lnkbt; 3010 Mat_Merge_SeqsToMPI *merge; 3011 PetscObjectContainer container; 3012 3013 PetscFunctionBegin; 3014 if (!logkey_seqstompisym) { 3015 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3016 } 3017 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3018 3019 /* make sure it is a PETSc comm */ 3020 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3021 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3022 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3023 3024 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3025 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3026 3027 /* determine row ownership */ 3028 /*---------------------------------------------------------*/ 3029 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3030 if (m == PETSC_DECIDE) { 3031 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3032 } else { 3033 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3034 } 3035 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3036 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3037 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3038 3039 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3040 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3041 3042 /* determine the number of messages to send, their lengths */ 3043 /*---------------------------------------------------------*/ 3044 len_s = merge->len_s; 3045 3046 len = 0; /* length of buf_si[] */ 3047 merge->nsend = 0; 3048 for (proc=0; proc<size; proc++){ 3049 len_si[proc] = 0; 3050 if (proc == rank){ 3051 len_s[proc] = 0; 3052 } else { 3053 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3054 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3055 } 3056 if (len_s[proc]) { 3057 merge->nsend++; 3058 nrows = 0; 3059 for (i=owners[proc]; i<owners[proc+1]; i++){ 3060 if (ai[i+1] > ai[i]) nrows++; 3061 } 3062 len_si[proc] = 2*(nrows+1); 3063 len += len_si[proc]; 3064 } 3065 } 3066 3067 /* determine the number and length of messages to receive for ij-structure */ 3068 /*-------------------------------------------------------------------------*/ 3069 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3070 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3071 3072 /* post the Irecv of j-structure */ 3073 /*-------------------------------*/ 3074 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3075 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3076 3077 /* post the Isend of j-structure */ 3078 /*--------------------------------*/ 3079 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3080 sj_waits = si_waits + merge->nsend; 3081 3082 for (proc=0, k=0; proc<size; proc++){ 3083 if (!len_s[proc]) continue; 3084 i = owners[proc]; 3085 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3086 k++; 3087 } 3088 3089 /* receives and sends of j-structure are complete */ 3090 /*------------------------------------------------*/ 3091 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3092 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3093 3094 /* send and recv i-structure */ 3095 /*---------------------------*/ 3096 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3097 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3098 3099 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3100 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3101 for (proc=0,k=0; proc<size; proc++){ 3102 if (!len_s[proc]) continue; 3103 /* form outgoing message for i-structure: 3104 buf_si[0]: nrows to be sent 3105 [1:nrows]: row index (global) 3106 [nrows+1:2*nrows+1]: i-structure index 3107 */ 3108 /*-------------------------------------------*/ 3109 nrows = len_si[proc]/2 - 1; 3110 buf_si_i = buf_si + nrows+1; 3111 buf_si[0] = nrows; 3112 buf_si_i[0] = 0; 3113 nrows = 0; 3114 for (i=owners[proc]; i<owners[proc+1]; i++){ 3115 anzi = ai[i+1] - ai[i]; 3116 if (anzi) { 3117 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3118 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3119 nrows++; 3120 } 3121 } 3122 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3123 k++; 3124 buf_si += len_si[proc]; 3125 } 3126 3127 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3128 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3129 3130 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr); 3131 for (i=0; i<merge->nrecv; i++){ 3132 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr); 3133 } 3134 3135 ierr = PetscFree(len_si);CHKERRQ(ierr); 3136 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3137 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3138 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3139 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3140 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3141 ierr = PetscFree(status);CHKERRQ(ierr); 3142 3143 /* compute a local seq matrix in each processor */ 3144 /*----------------------------------------------*/ 3145 /* allocate bi array and free space for accumulating nonzero column info */ 3146 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3147 bi[0] = 0; 3148 3149 /* create and initialize a linked list */ 3150 nlnk = N+1; 3151 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3152 3153 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3154 len = 0; 3155 len = ai[owners[rank+1]] - ai[owners[rank]]; 3156 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3157 current_space = free_space; 3158 3159 /* determine symbolic info for each local row */ 3160 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3161 nextrow = buf_ri_k + merge->nrecv; 3162 nextai = nextrow + merge->nrecv; 3163 for (k=0; k<merge->nrecv; k++){ 3164 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3165 nrows = *buf_ri_k[k]; 3166 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3167 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3168 } 3169 3170 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3171 len = 0; 3172 for (i=0;i<m;i++) { 3173 bnzi = 0; 3174 /* add local non-zero cols of this proc's seqmat into lnk */ 3175 arow = owners[rank] + i; 3176 anzi = ai[arow+1] - ai[arow]; 3177 aj = a->j + ai[arow]; 3178 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3179 bnzi += nlnk; 3180 /* add received col data into lnk */ 3181 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3182 if (i == *nextrow[k]) { /* i-th row */ 3183 anzi = *(nextai[k]+1) - *nextai[k]; 3184 aj = buf_rj[k] + *nextai[k]; 3185 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3186 bnzi += nlnk; 3187 nextrow[k]++; nextai[k]++; 3188 } 3189 } 3190 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3191 3192 /* if free space is not available, make more free space */ 3193 if (current_space->local_remaining<bnzi) { 3194 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3195 nspacedouble++; 3196 } 3197 /* copy data into free space, then initialize lnk */ 3198 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3199 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3200 3201 current_space->array += bnzi; 3202 current_space->local_used += bnzi; 3203 current_space->local_remaining -= bnzi; 3204 3205 bi[i+1] = bi[i] + bnzi; 3206 } 3207 3208 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3209 3210 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3211 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3212 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3213 3214 /* create symbolic parallel matrix B_mpi */ 3215 /*---------------------------------------*/ 3216 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3217 if (n==PETSC_DECIDE) { 3218 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3219 } else { 3220 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3221 } 3222 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3223 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3224 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3225 3226 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3227 B_mpi->assembled = PETSC_FALSE; 3228 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3229 merge->bi = bi; 3230 merge->bj = bj; 3231 merge->buf_ri = buf_ri; 3232 merge->buf_rj = buf_rj; 3233 merge->coi = PETSC_NULL; 3234 merge->coj = PETSC_NULL; 3235 merge->owners_co = PETSC_NULL; 3236 3237 /* attach the supporting struct to B_mpi for reuse */ 3238 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3239 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3240 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3241 *mpimat = B_mpi; 3242 3243 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3244 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3245 PetscFunctionReturn(0); 3246 } 3247 3248 static PetscEvent logkey_seqstompi = 0; 3249 #undef __FUNCT__ 3250 #define __FUNCT__ "MatMerge_SeqsToMPI" 3251 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3252 { 3253 PetscErrorCode ierr; 3254 3255 PetscFunctionBegin; 3256 if (!logkey_seqstompi) { 3257 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3258 } 3259 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3260 if (scall == MAT_INITIAL_MATRIX){ 3261 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3262 } 3263 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3264 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3265 PetscFunctionReturn(0); 3266 } 3267 static PetscEvent logkey_getlocalmat = 0; 3268 #undef __FUNCT__ 3269 #define __FUNCT__ "MatGetLocalMat" 3270 /*@C 3271 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3272 3273 Not Collective 3274 3275 Input Parameters: 3276 + A - the matrix 3277 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3278 3279 Output Parameter: 3280 . A_loc - the local sequential matrix generated 3281 3282 Level: developer 3283 3284 @*/ 3285 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3286 { 3287 PetscErrorCode ierr; 3288 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3289 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3290 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3291 PetscScalar *aa=a->a,*ba=b->a,*ca; 3292 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3293 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3294 3295 PetscFunctionBegin; 3296 if (!logkey_getlocalmat) { 3297 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3298 } 3299 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3300 if (scall == MAT_INITIAL_MATRIX){ 3301 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3302 ci[0] = 0; 3303 for (i=0; i<am; i++){ 3304 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3305 } 3306 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3307 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3308 k = 0; 3309 for (i=0; i<am; i++) { 3310 ncols_o = bi[i+1] - bi[i]; 3311 ncols_d = ai[i+1] - ai[i]; 3312 /* off-diagonal portion of A */ 3313 for (jo=0; jo<ncols_o; jo++) { 3314 col = cmap[*bj]; 3315 if (col >= cstart) break; 3316 cj[k] = col; bj++; 3317 ca[k++] = *ba++; 3318 } 3319 /* diagonal portion of A */ 3320 for (j=0; j<ncols_d; j++) { 3321 cj[k] = cstart + *aj++; 3322 ca[k++] = *aa++; 3323 } 3324 /* off-diagonal portion of A */ 3325 for (j=jo; j<ncols_o; j++) { 3326 cj[k] = cmap[*bj++]; 3327 ca[k++] = *ba++; 3328 } 3329 } 3330 /* put together the new matrix */ 3331 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3332 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3333 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3334 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3335 mat->freedata = PETSC_TRUE; 3336 mat->nonew = 0; 3337 } else if (scall == MAT_REUSE_MATRIX){ 3338 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3339 ci = mat->i; cj = mat->j; ca = mat->a; 3340 for (i=0; i<am; i++) { 3341 /* off-diagonal portion of A */ 3342 ncols_o = bi[i+1] - bi[i]; 3343 for (jo=0; jo<ncols_o; jo++) { 3344 col = cmap[*bj]; 3345 if (col >= cstart) break; 3346 *ca++ = *ba++; bj++; 3347 } 3348 /* diagonal portion of A */ 3349 ncols_d = ai[i+1] - ai[i]; 3350 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3351 /* off-diagonal portion of A */ 3352 for (j=jo; j<ncols_o; j++) { 3353 *ca++ = *ba++; bj++; 3354 } 3355 } 3356 } else { 3357 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3358 } 3359 3360 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3361 PetscFunctionReturn(0); 3362 } 3363 3364 static PetscEvent logkey_getlocalmatcondensed = 0; 3365 #undef __FUNCT__ 3366 #define __FUNCT__ "MatGetLocalMatCondensed" 3367 /*@C 3368 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3369 3370 Not Collective 3371 3372 Input Parameters: 3373 + A - the matrix 3374 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3375 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3376 3377 Output Parameter: 3378 . A_loc - the local sequential matrix generated 3379 3380 Level: developer 3381 3382 @*/ 3383 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3384 { 3385 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3386 PetscErrorCode ierr; 3387 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3388 IS isrowa,iscola; 3389 Mat *aloc; 3390 3391 PetscFunctionBegin; 3392 if (!logkey_getlocalmatcondensed) { 3393 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3394 } 3395 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3396 if (!row){ 3397 start = a->rstart; end = a->rend; 3398 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3399 } else { 3400 isrowa = *row; 3401 } 3402 if (!col){ 3403 start = a->cstart; 3404 cmap = a->garray; 3405 nzA = a->A->n; 3406 nzB = a->B->n; 3407 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3408 ncols = 0; 3409 for (i=0; i<nzB; i++) { 3410 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3411 else break; 3412 } 3413 imark = i; 3414 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3415 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3416 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3417 ierr = PetscFree(idx);CHKERRQ(ierr); 3418 } else { 3419 iscola = *col; 3420 } 3421 if (scall != MAT_INITIAL_MATRIX){ 3422 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3423 aloc[0] = *A_loc; 3424 } 3425 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3426 *A_loc = aloc[0]; 3427 ierr = PetscFree(aloc);CHKERRQ(ierr); 3428 if (!row){ 3429 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3430 } 3431 if (!col){ 3432 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3433 } 3434 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3435 PetscFunctionReturn(0); 3436 } 3437 3438 static PetscEvent logkey_GetBrowsOfAcols = 0; 3439 #undef __FUNCT__ 3440 #define __FUNCT__ "MatGetBrowsOfAcols" 3441 /*@C 3442 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3443 3444 Collective on Mat 3445 3446 Input Parameters: 3447 + A,B - the matrices in mpiaij format 3448 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3449 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3450 3451 Output Parameter: 3452 + rowb, colb - index sets of rows and columns of B to extract 3453 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3454 - B_seq - the sequential matrix generated 3455 3456 Level: developer 3457 3458 @*/ 3459 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3460 { 3461 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3462 PetscErrorCode ierr; 3463 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3464 IS isrowb,iscolb; 3465 Mat *bseq; 3466 3467 PetscFunctionBegin; 3468 if (a->cstart != b->rstart || a->cend != b->rend){ 3469 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3470 } 3471 if (!logkey_GetBrowsOfAcols) { 3472 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3473 } 3474 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3475 3476 if (scall == MAT_INITIAL_MATRIX){ 3477 start = a->cstart; 3478 cmap = a->garray; 3479 nzA = a->A->n; 3480 nzB = a->B->n; 3481 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3482 ncols = 0; 3483 for (i=0; i<nzB; i++) { /* row < local row index */ 3484 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3485 else break; 3486 } 3487 imark = i; 3488 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3489 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3490 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3491 ierr = PetscFree(idx);CHKERRQ(ierr); 3492 *brstart = imark; 3493 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3494 } else { 3495 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3496 isrowb = *rowb; iscolb = *colb; 3497 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3498 bseq[0] = *B_seq; 3499 } 3500 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3501 *B_seq = bseq[0]; 3502 ierr = PetscFree(bseq);CHKERRQ(ierr); 3503 if (!rowb){ 3504 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3505 } else { 3506 *rowb = isrowb; 3507 } 3508 if (!colb){ 3509 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3510 } else { 3511 *colb = iscolb; 3512 } 3513 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3514 PetscFunctionReturn(0); 3515 } 3516 3517 static PetscEvent logkey_GetBrowsOfAocols = 0; 3518 #undef __FUNCT__ 3519 #define __FUNCT__ "MatGetBrowsOfAoCols" 3520 /*@C 3521 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3522 of the OFF-DIAGONAL portion of local A 3523 3524 Collective on Mat 3525 3526 Input Parameters: 3527 + A,B - the matrices in mpiaij format 3528 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3529 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3530 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3531 3532 Output Parameter: 3533 + B_oth - the sequential matrix generated 3534 3535 Level: developer 3536 3537 @*/ 3538 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3539 { 3540 VecScatter_MPI_General *gen_to,*gen_from; 3541 PetscErrorCode ierr; 3542 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3543 Mat_SeqAIJ *b_oth; 3544 VecScatter ctx=a->Mvctx; 3545 MPI_Comm comm=ctx->comm; 3546 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3547 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3548 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3549 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3550 MPI_Request *rwaits,*swaits; 3551 MPI_Status *sstatus,rstatus; 3552 PetscInt *cols; 3553 PetscScalar *vals; 3554 PetscMPIInt j; 3555 3556 PetscFunctionBegin; 3557 if (a->cstart != b->rstart || a->cend != b->rend){ 3558 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3559 } 3560 if (!logkey_GetBrowsOfAocols) { 3561 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3562 } 3563 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3564 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3565 3566 gen_to = (VecScatter_MPI_General*)ctx->todata; 3567 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3568 rvalues = gen_from->values; /* holds the length of sending row */ 3569 svalues = gen_to->values; /* holds the length of receiving row */ 3570 nrecvs = gen_from->n; 3571 nsends = gen_to->n; 3572 rwaits = gen_from->requests; 3573 swaits = gen_to->requests; 3574 srow = gen_to->indices; /* local row index to be sent */ 3575 rstarts = gen_from->starts; 3576 sstarts = gen_to->starts; 3577 rprocs = gen_from->procs; 3578 sprocs = gen_to->procs; 3579 sstatus = gen_to->sstatus; 3580 3581 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3582 if (scall == MAT_INITIAL_MATRIX){ 3583 /* i-array */ 3584 /*---------*/ 3585 /* post receives */ 3586 for (i=0; i<nrecvs; i++){ 3587 rowlen = (PetscInt*)rvalues + rstarts[i]; 3588 nrows = rstarts[i+1]-rstarts[i]; 3589 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3590 } 3591 3592 /* pack the outgoing message */ 3593 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3594 rstartsj = sstartsj + nsends +1; 3595 sstartsj[0] = 0; rstartsj[0] = 0; 3596 len = 0; /* total length of j or a array to be sent */ 3597 k = 0; 3598 for (i=0; i<nsends; i++){ 3599 rowlen = (PetscInt*)svalues + sstarts[i]; 3600 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3601 for (j=0; j<nrows; j++) { 3602 row = srow[k] + b->rowners[rank]; /* global row idx */ 3603 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3604 len += rowlen[j]; 3605 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3606 k++; 3607 } 3608 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3609 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3610 } 3611 /* recvs and sends of i-array are completed */ 3612 i = nrecvs; 3613 while (i--) { 3614 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3615 } 3616 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3617 /* allocate buffers for sending j and a arrays */ 3618 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3619 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3620 3621 /* create i-array of B_oth */ 3622 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3623 b_othi[0] = 0; 3624 len = 0; /* total length of j or a array to be received */ 3625 k = 0; 3626 for (i=0; i<nrecvs; i++){ 3627 rowlen = (PetscInt*)rvalues + rstarts[i]; 3628 nrows = rstarts[i+1]-rstarts[i]; 3629 for (j=0; j<nrows; j++) { 3630 b_othi[k+1] = b_othi[k] + rowlen[j]; 3631 len += rowlen[j]; k++; 3632 } 3633 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3634 } 3635 3636 /* allocate space for j and a arrrays of B_oth */ 3637 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3638 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3639 3640 /* j-array */ 3641 /*---------*/ 3642 /* post receives of j-array */ 3643 for (i=0; i<nrecvs; i++){ 3644 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3645 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3646 } 3647 k = 0; 3648 for (i=0; i<nsends; i++){ 3649 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3650 bufJ = bufj+sstartsj[i]; 3651 for (j=0; j<nrows; j++) { 3652 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3653 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3654 for (l=0; l<ncols; l++){ 3655 *bufJ++ = cols[l]; 3656 } 3657 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3658 } 3659 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3660 } 3661 3662 /* recvs and sends of j-array are completed */ 3663 i = nrecvs; 3664 while (i--) { 3665 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3666 } 3667 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3668 } else if (scall == MAT_REUSE_MATRIX){ 3669 sstartsj = *startsj; 3670 rstartsj = sstartsj + nsends +1; 3671 bufa = *bufa_ptr; 3672 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3673 b_otha = b_oth->a; 3674 } else { 3675 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3676 } 3677 3678 /* a-array */ 3679 /*---------*/ 3680 /* post receives of a-array */ 3681 for (i=0; i<nrecvs; i++){ 3682 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3683 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3684 } 3685 k = 0; 3686 for (i=0; i<nsends; i++){ 3687 nrows = sstarts[i+1]-sstarts[i]; 3688 bufA = bufa+sstartsj[i]; 3689 for (j=0; j<nrows; j++) { 3690 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3691 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3692 for (l=0; l<ncols; l++){ 3693 *bufA++ = vals[l]; 3694 } 3695 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3696 3697 } 3698 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3699 } 3700 /* recvs and sends of a-array are completed */ 3701 i = nrecvs; 3702 while (i--) { 3703 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3704 } 3705 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3706 3707 if (scall == MAT_INITIAL_MATRIX){ 3708 /* put together the new matrix */ 3709 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3710 3711 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3712 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3713 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3714 b_oth->freedata = PETSC_TRUE; 3715 b_oth->nonew = 0; 3716 3717 ierr = PetscFree(bufj);CHKERRQ(ierr); 3718 if (!startsj || !bufa_ptr){ 3719 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3720 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3721 } else { 3722 *startsj = sstartsj; 3723 *bufa_ptr = bufa; 3724 } 3725 } 3726 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3727 3728 PetscFunctionReturn(0); 3729 } 3730 3731 /*MC 3732 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3733 3734 Options Database Keys: 3735 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3736 3737 Level: beginner 3738 3739 .seealso: MatCreateMPIAIJ 3740 M*/ 3741 3742 EXTERN_C_BEGIN 3743 #undef __FUNCT__ 3744 #define __FUNCT__ "MatCreate_MPIAIJ" 3745 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 3746 { 3747 Mat_MPIAIJ *b; 3748 PetscErrorCode ierr; 3749 PetscInt i; 3750 PetscMPIInt size; 3751 3752 PetscFunctionBegin; 3753 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3754 3755 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3756 B->data = (void*)b; 3757 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3758 B->factor = 0; 3759 B->bs = 1; 3760 B->assembled = PETSC_FALSE; 3761 B->mapping = 0; 3762 3763 B->insertmode = NOT_SET_VALUES; 3764 b->size = size; 3765 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3766 3767 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3768 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3769 3770 /* the information in the maps duplicates the information computed below, eventually 3771 we should remove the duplicate information that is not contained in the maps */ 3772 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3773 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3774 3775 /* build local table of row and column ownerships */ 3776 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3777 ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3778 b->cowners = b->rowners + b->size + 2; 3779 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3780 b->rowners[0] = 0; 3781 for (i=2; i<=b->size; i++) { 3782 b->rowners[i] += b->rowners[i-1]; 3783 } 3784 b->rstart = b->rowners[b->rank]; 3785 b->rend = b->rowners[b->rank+1]; 3786 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3787 b->cowners[0] = 0; 3788 for (i=2; i<=b->size; i++) { 3789 b->cowners[i] += b->cowners[i-1]; 3790 } 3791 b->cstart = b->cowners[b->rank]; 3792 b->cend = b->cowners[b->rank+1]; 3793 3794 /* build cache for off array entries formed */ 3795 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3796 b->donotstash = PETSC_FALSE; 3797 b->colmap = 0; 3798 b->garray = 0; 3799 b->roworiented = PETSC_TRUE; 3800 3801 /* stuff used for matrix vector multiply */ 3802 b->lvec = PETSC_NULL; 3803 b->Mvctx = PETSC_NULL; 3804 3805 /* stuff for MatGetRow() */ 3806 b->rowindices = 0; 3807 b->rowvalues = 0; 3808 b->getrowactive = PETSC_FALSE; 3809 3810 /* Explicitly create 2 MATSEQAIJ matrices. */ 3811 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3812 ierr = MatSetSizes(b->A,B->m,B->n,B->m,B->n);CHKERRQ(ierr); 3813 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3814 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3815 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3816 ierr = MatSetSizes(b->B,B->m,B->N,B->m,B->N);CHKERRQ(ierr); 3817 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3818 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3819 3820 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3821 "MatStoreValues_MPIAIJ", 3822 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3823 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3824 "MatRetrieveValues_MPIAIJ", 3825 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3826 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3827 "MatGetDiagonalBlock_MPIAIJ", 3828 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3829 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3830 "MatIsTranspose_MPIAIJ", 3831 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3832 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3833 "MatMPIAIJSetPreallocation_MPIAIJ", 3834 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3835 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3836 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3837 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3838 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3839 "MatDiagonalScaleLocal_MPIAIJ", 3840 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3841 PetscFunctionReturn(0); 3842 } 3843 EXTERN_C_END 3844