1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 6 /*MC 7 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 8 9 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 10 and MATMPIAIJ otherwise. As a result, for single process communicators, 11 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 12 for communicators controlling multiple processes. It is recommended that you call both of 13 the above preallocation routines for simplicity. 14 15 Options Database Keys: 16 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 17 18 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 19 enough exist. 20 21 Level: beginner 22 23 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 24 M*/ 25 26 /*MC 27 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 28 29 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 30 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 31 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 32 for communicators controlling multiple processes. It is recommended that you call both of 33 the above preallocation routines for simplicity. 34 35 Options Database Keys: 36 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 37 38 Level: beginner 39 40 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 41 M*/ 42 43 #undef __FUNCT__ 44 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 45 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 46 { 47 PetscErrorCode ierr; 48 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 49 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 50 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 51 const PetscInt *ia,*ib; 52 const MatScalar *aa,*bb; 53 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 54 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 55 56 PetscFunctionBegin; 57 *keptrows = 0; 58 ia = a->i; 59 ib = b->i; 60 for (i=0; i<m; i++) { 61 na = ia[i+1] - ia[i]; 62 nb = ib[i+1] - ib[i]; 63 if (!na && !nb) { 64 cnt++; 65 goto ok1; 66 } 67 aa = a->a + ia[i]; 68 for (j=0; j<na; j++) { 69 if (aa[j] != 0.0) goto ok1; 70 } 71 bb = b->a + ib[i]; 72 for (j=0; j <nb; j++) { 73 if (bb[j] != 0.0) goto ok1; 74 } 75 cnt++; 76 ok1:; 77 } 78 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 79 if (!n0rows) PetscFunctionReturn(0); 80 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 81 cnt = 0; 82 for (i=0; i<m; i++) { 83 na = ia[i+1] - ia[i]; 84 nb = ib[i+1] - ib[i]; 85 if (!na && !nb) continue; 86 aa = a->a + ia[i]; 87 for (j=0; j<na;j++) { 88 if (aa[j] != 0.0) { 89 rows[cnt++] = rstart + i; 90 goto ok2; 91 } 92 } 93 bb = b->a + ib[i]; 94 for (j=0; j<nb; j++) { 95 if (bb[j] != 0.0) { 96 rows[cnt++] = rstart + i; 97 goto ok2; 98 } 99 } 100 ok2:; 101 } 102 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 108 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 109 { 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 111 PetscErrorCode ierr; 112 PetscInt i,rstart,nrows,*rows; 113 114 PetscFunctionBegin; 115 *zrows = NULL; 116 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 117 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 118 for (i=0; i<nrows; i++) rows[i] += rstart; 119 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 125 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 126 { 127 PetscErrorCode ierr; 128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 129 PetscInt i,n,*garray = aij->garray; 130 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 131 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 132 PetscReal *work; 133 134 PetscFunctionBegin; 135 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 136 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 137 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 if (!rank) { 206 bses[0] = gmat->rmap->bs; 207 bses[1] = gmat->cmap->bs; 208 } 209 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 210 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 211 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 212 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 213 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 214 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 215 216 rowners[0] = 0; 217 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 218 rstart = rowners[rank]; 219 rend = rowners[rank+1]; 220 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 221 if (!rank) { 222 gmata = (Mat_SeqAIJ*) gmat->data; 223 /* send row lengths to all processors */ 224 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 225 for (i=1; i<size; i++) { 226 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 227 } 228 /* determine number diagonal and off-diagonal counts */ 229 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 230 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 231 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 232 jj = 0; 233 for (i=0; i<m; i++) { 234 for (j=0; j<dlens[i]; j++) { 235 if (gmata->j[jj] < rstart) ld[i]++; 236 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 237 jj++; 238 } 239 } 240 /* send column indices to other processes */ 241 for (i=1; i<size; i++) { 242 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 243 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 245 } 246 247 /* send numerical values to other processes */ 248 for (i=1; i<size; i++) { 249 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 250 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 251 } 252 gmataa = gmata->a; 253 gmataj = gmata->j; 254 255 } else { 256 /* receive row lengths */ 257 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 /* receive column indices */ 259 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 260 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 261 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 262 /* determine number diagonal and off-diagonal counts */ 263 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 264 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 265 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 266 jj = 0; 267 for (i=0; i<m; i++) { 268 for (j=0; j<dlens[i]; j++) { 269 if (gmataj[jj] < rstart) ld[i]++; 270 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 271 jj++; 272 } 273 } 274 /* receive numerical values */ 275 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 276 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 277 } 278 /* set preallocation */ 279 for (i=0; i<m; i++) { 280 dlens[i] -= olens[i]; 281 } 282 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 283 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 284 285 for (i=0; i<m; i++) { 286 dlens[i] += olens[i]; 287 } 288 cnt = 0; 289 for (i=0; i<m; i++) { 290 row = rstart + i; 291 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 292 cnt += dlens[i]; 293 } 294 if (rank) { 295 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 296 } 297 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 298 ierr = PetscFree(rowners);CHKERRQ(ierr); 299 300 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 301 302 *inmat = mat; 303 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 304 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 305 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 306 mat = *inmat; 307 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 308 if (!rank) { 309 /* send numerical values to other processes */ 310 gmata = (Mat_SeqAIJ*) gmat->data; 311 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 312 gmataa = gmata->a; 313 for (i=1; i<size; i++) { 314 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 315 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 316 } 317 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 318 } else { 319 /* receive numerical values from process 0*/ 320 nz = Ad->nz + Ao->nz; 321 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 322 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 323 } 324 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 325 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 326 ad = Ad->a; 327 ao = Ao->a; 328 if (mat->rmap->n) { 329 i = 0; 330 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 331 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 332 } 333 for (i=1; i<mat->rmap->n; i++) { 334 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 335 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 336 } 337 i--; 338 if (mat->rmap->n) { 339 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 340 } 341 if (rank) { 342 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 343 } 344 } 345 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 346 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 /* 351 Local utility routine that creates a mapping from the global column 352 number to the local number in the off-diagonal part of the local 353 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 354 a slightly higher hash table cost; without it it is not scalable (each processor 355 has an order N integer array but is fast to acess. 356 */ 357 #undef __FUNCT__ 358 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 359 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 360 { 361 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 362 PetscErrorCode ierr; 363 PetscInt n = aij->B->cmap->n,i; 364 365 PetscFunctionBegin; 366 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 367 #if defined(PETSC_USE_CTABLE) 368 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 369 for (i=0; i<n; i++) { 370 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 371 } 372 #else 373 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 374 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 375 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 376 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 377 #endif 378 PetscFunctionReturn(0); 379 } 380 381 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 382 { \ 383 if (col <= lastcol1) low1 = 0; \ 384 else high1 = nrow1; \ 385 lastcol1 = col;\ 386 while (high1-low1 > 5) { \ 387 t = (low1+high1)/2; \ 388 if (rp1[t] > col) high1 = t; \ 389 else low1 = t; \ 390 } \ 391 for (_i=low1; _i<high1; _i++) { \ 392 if (rp1[_i] > col) break; \ 393 if (rp1[_i] == col) { \ 394 if (addv == ADD_VALUES) ap1[_i] += value; \ 395 else ap1[_i] = value; \ 396 goto a_noinsert; \ 397 } \ 398 } \ 399 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 400 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 401 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 402 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 403 N = nrow1++ - 1; a->nz++; high1++; \ 404 /* shift up all the later entries in this row */ \ 405 for (ii=N; ii>=_i; ii--) { \ 406 rp1[ii+1] = rp1[ii]; \ 407 ap1[ii+1] = ap1[ii]; \ 408 } \ 409 rp1[_i] = col; \ 410 ap1[_i] = value; \ 411 a_noinsert: ; \ 412 ailen[row] = nrow1; \ 413 } 414 415 416 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 417 { \ 418 if (col <= lastcol2) low2 = 0; \ 419 else high2 = nrow2; \ 420 lastcol2 = col; \ 421 while (high2-low2 > 5) { \ 422 t = (low2+high2)/2; \ 423 if (rp2[t] > col) high2 = t; \ 424 else low2 = t; \ 425 } \ 426 for (_i=low2; _i<high2; _i++) { \ 427 if (rp2[_i] > col) break; \ 428 if (rp2[_i] == col) { \ 429 if (addv == ADD_VALUES) ap2[_i] += value; \ 430 else ap2[_i] = value; \ 431 goto b_noinsert; \ 432 } \ 433 } \ 434 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 435 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 436 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 437 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 438 N = nrow2++ - 1; b->nz++; high2++; \ 439 /* shift up all the later entries in this row */ \ 440 for (ii=N; ii>=_i; ii--) { \ 441 rp2[ii+1] = rp2[ii]; \ 442 ap2[ii+1] = ap2[ii]; \ 443 } \ 444 rp2[_i] = col; \ 445 ap2[_i] = value; \ 446 b_noinsert: ; \ 447 bilen[row] = nrow2; \ 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 452 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 453 { 454 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 455 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 456 PetscErrorCode ierr; 457 PetscInt l,*garray = mat->garray,diag; 458 459 PetscFunctionBegin; 460 /* code only works for square matrices A */ 461 462 /* find size of row to the left of the diagonal part */ 463 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 464 row = row - diag; 465 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 466 if (garray[b->j[b->i[row]+l]] > diag) break; 467 } 468 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 469 470 /* diagonal part */ 471 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 472 473 /* right of diagonal part */ 474 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatSetValues_MPIAIJ" 480 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 481 { 482 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 483 PetscScalar value; 484 PetscErrorCode ierr; 485 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 486 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 487 PetscBool roworiented = aij->roworiented; 488 489 /* Some Variables required in the macro */ 490 Mat A = aij->A; 491 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 492 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 493 MatScalar *aa = a->a; 494 PetscBool ignorezeroentries = a->ignorezeroentries; 495 Mat B = aij->B; 496 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 497 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 498 MatScalar *ba = b->a; 499 500 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 501 PetscInt nonew; 502 MatScalar *ap1,*ap2; 503 504 PetscFunctionBegin; 505 if (v) PetscValidScalarPointer(v,6); 506 for (i=0; i<m; i++) { 507 if (im[i] < 0) continue; 508 #if defined(PETSC_USE_DEBUG) 509 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 510 #endif 511 if (im[i] >= rstart && im[i] < rend) { 512 row = im[i] - rstart; 513 lastcol1 = -1; 514 rp1 = aj + ai[row]; 515 ap1 = aa + ai[row]; 516 rmax1 = aimax[row]; 517 nrow1 = ailen[row]; 518 low1 = 0; 519 high1 = nrow1; 520 lastcol2 = -1; 521 rp2 = bj + bi[row]; 522 ap2 = ba + bi[row]; 523 rmax2 = bimax[row]; 524 nrow2 = bilen[row]; 525 low2 = 0; 526 high2 = nrow2; 527 528 for (j=0; j<n; j++) { 529 if (v) { 530 if (roworiented) value = v[i*n+j]; 531 else value = v[i+j*m]; 532 } else value = 0.0; 533 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 534 if (in[j] >= cstart && in[j] < cend) { 535 col = in[j] - cstart; 536 nonew = a->nonew; 537 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 538 } else if (in[j] < 0) continue; 539 #if defined(PETSC_USE_DEBUG) 540 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 541 #endif 542 else { 543 if (mat->was_assembled) { 544 if (!aij->colmap) { 545 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 546 } 547 #if defined(PETSC_USE_CTABLE) 548 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 549 col--; 550 #else 551 col = aij->colmap[in[j]] - 1; 552 #endif 553 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 554 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 555 col = in[j]; 556 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 557 B = aij->B; 558 b = (Mat_SeqAIJ*)B->data; 559 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 560 rp2 = bj + bi[row]; 561 ap2 = ba + bi[row]; 562 rmax2 = bimax[row]; 563 nrow2 = bilen[row]; 564 low2 = 0; 565 high2 = nrow2; 566 bm = aij->B->rmap->n; 567 ba = b->a; 568 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 569 } else col = in[j]; 570 nonew = b->nonew; 571 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 572 } 573 } 574 } else { 575 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 576 if (!aij->donotstash) { 577 mat->assembled = PETSC_FALSE; 578 if (roworiented) { 579 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 580 } else { 581 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 582 } 583 } 584 } 585 } 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatGetValues_MPIAIJ" 591 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 592 { 593 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 594 PetscErrorCode ierr; 595 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 596 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 597 598 PetscFunctionBegin; 599 for (i=0; i<m; i++) { 600 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 601 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 602 if (idxm[i] >= rstart && idxm[i] < rend) { 603 row = idxm[i] - rstart; 604 for (j=0; j<n; j++) { 605 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 606 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 607 if (idxn[j] >= cstart && idxn[j] < cend) { 608 col = idxn[j] - cstart; 609 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 610 } else { 611 if (!aij->colmap) { 612 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 613 } 614 #if defined(PETSC_USE_CTABLE) 615 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 616 col--; 617 #else 618 col = aij->colmap[idxn[j]] - 1; 619 #endif 620 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 621 else { 622 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 623 } 624 } 625 } 626 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 627 } 628 PetscFunctionReturn(0); 629 } 630 631 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 632 633 #undef __FUNCT__ 634 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 635 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 636 { 637 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 638 PetscErrorCode ierr; 639 PetscInt nstash,reallocs; 640 InsertMode addv; 641 642 PetscFunctionBegin; 643 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 644 645 /* make sure all processors are either in INSERTMODE or ADDMODE */ 646 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 647 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 648 mat->insertmode = addv; /* in case this processor had no cache */ 649 650 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 651 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 652 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 653 PetscFunctionReturn(0); 654 } 655 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 658 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 659 { 660 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 661 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 662 PetscErrorCode ierr; 663 PetscMPIInt n; 664 PetscInt i,j,rstart,ncols,flg; 665 PetscInt *row,*col; 666 PetscBool other_disassembled; 667 PetscScalar *val; 668 InsertMode addv = mat->insertmode; 669 670 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 671 672 PetscFunctionBegin; 673 if (!aij->donotstash && !mat->nooffprocentries) { 674 while (1) { 675 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 676 if (!flg) break; 677 678 for (i=0; i<n; ) { 679 /* Now identify the consecutive vals belonging to the same row */ 680 for (j=i,rstart=row[j]; j<n; j++) { 681 if (row[j] != rstart) break; 682 } 683 if (j < n) ncols = j-i; 684 else ncols = n-i; 685 /* Now assemble all these values with a single function call */ 686 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 687 688 i = j; 689 } 690 } 691 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 692 } 693 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 694 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 695 696 /* determine if any processor has disassembled, if so we must 697 also disassemble ourselfs, in order that we may reassemble. */ 698 /* 699 if nonzero structure of submatrix B cannot change then we know that 700 no processor disassembled thus we can skip this stuff 701 */ 702 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 703 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 704 if (mat->was_assembled && !other_disassembled) { 705 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 706 } 707 } 708 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 709 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 710 } 711 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 712 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 713 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 714 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 715 716 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 717 718 aij->rowvalues = 0; 719 720 /* used by MatAXPY() */ 721 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 722 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 723 724 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 725 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 731 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 732 { 733 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 734 PetscErrorCode ierr; 735 736 PetscFunctionBegin; 737 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 738 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatZeroRows_MPIAIJ" 744 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 745 { 746 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 747 PetscErrorCode ierr; 748 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 749 PetscInt i,*owners = A->rmap->range; 750 PetscInt *nprocs,j,idx,nsends,row; 751 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 752 PetscInt *rvalues,count,base,slen,*source; 753 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 754 MPI_Comm comm; 755 MPI_Request *send_waits,*recv_waits; 756 MPI_Status recv_status,*send_status; 757 const PetscScalar *xx; 758 PetscScalar *bb; 759 #if defined(PETSC_DEBUG) 760 PetscBool found = PETSC_FALSE; 761 #endif 762 763 PetscFunctionBegin; 764 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 765 /* first count number of contributors to each processor */ 766 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 767 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 768 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 769 j = 0; 770 for (i=0; i<N; i++) { 771 if (lastidx > (idx = rows[i])) j = 0; 772 lastidx = idx; 773 for (; j<size; j++) { 774 if (idx >= owners[j] && idx < owners[j+1]) { 775 nprocs[2*j]++; 776 nprocs[2*j+1] = 1; 777 owner[i] = j; 778 #if defined(PETSC_DEBUG) 779 found = PETSC_TRUE; 780 #endif 781 break; 782 } 783 } 784 #if defined(PETSC_DEBUG) 785 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 786 found = PETSC_FALSE; 787 #endif 788 } 789 nsends = 0; 790 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 791 792 if (A->nooffproczerorows) { 793 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 794 nrecvs = nsends; 795 nmax = N; 796 } else { 797 /* inform other processors of number of messages and max length*/ 798 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 799 } 800 801 /* post receives: */ 802 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 803 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 804 for (i=0; i<nrecvs; i++) { 805 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 806 } 807 808 /* do sends: 809 1) starts[i] gives the starting index in svalues for stuff going to 810 the ith processor 811 */ 812 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 813 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 814 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 815 816 starts[0] = 0; 817 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 818 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 819 820 starts[0] = 0; 821 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 822 count = 0; 823 for (i=0; i<size; i++) { 824 if (nprocs[2*i+1]) { 825 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 826 } 827 } 828 ierr = PetscFree(starts);CHKERRQ(ierr); 829 830 base = owners[rank]; 831 832 /* wait on receives */ 833 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 834 count = nrecvs; slen = 0; 835 while (count) { 836 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 837 /* unpack receives into our local space */ 838 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 839 840 source[imdex] = recv_status.MPI_SOURCE; 841 lens[imdex] = n; 842 slen += n; 843 count--; 844 } 845 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 846 847 /* move the data into the send scatter */ 848 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 849 count = 0; 850 for (i=0; i<nrecvs; i++) { 851 values = rvalues + i*nmax; 852 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 853 } 854 ierr = PetscFree(rvalues);CHKERRQ(ierr); 855 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 856 ierr = PetscFree(owner);CHKERRQ(ierr); 857 ierr = PetscFree(nprocs);CHKERRQ(ierr); 858 859 /* fix right hand side if needed */ 860 if (x && b) { 861 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 862 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 863 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 864 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 865 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 866 } 867 /* 868 Zero the required rows. If the "diagonal block" of the matrix 869 is square and the user wishes to set the diagonal we use separate 870 code so that MatSetValues() is not called for each diagonal allocating 871 new memory, thus calling lots of mallocs and slowing things down. 872 873 */ 874 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 875 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 876 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 877 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 878 } else if (diag != 0.0) { 879 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 880 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 881 for (i = 0; i < slen; i++) { 882 row = lrows[i] + rstart; 883 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 884 } 885 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 886 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 } else { 888 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 889 } 890 ierr = PetscFree(lrows);CHKERRQ(ierr); 891 892 /* wait on sends */ 893 if (nsends) { 894 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 895 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 896 ierr = PetscFree(send_status);CHKERRQ(ierr); 897 } 898 ierr = PetscFree(send_waits);CHKERRQ(ierr); 899 ierr = PetscFree(svalues);CHKERRQ(ierr); 900 PetscFunctionReturn(0); 901 } 902 903 #undef __FUNCT__ 904 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 905 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 906 { 907 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 908 PetscErrorCode ierr; 909 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 910 PetscInt i,*owners = A->rmap->range; 911 PetscInt *nprocs,j,idx,nsends; 912 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 913 PetscInt *rvalues,count,base,slen,*source; 914 PetscInt *lens,*lrows,*values,m; 915 MPI_Comm comm; 916 MPI_Request *send_waits,*recv_waits; 917 MPI_Status recv_status,*send_status; 918 const PetscScalar *xx; 919 PetscScalar *bb,*mask; 920 Vec xmask,lmask; 921 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 922 const PetscInt *aj, *ii,*ridx; 923 PetscScalar *aa; 924 #if defined(PETSC_DEBUG) 925 PetscBool found = PETSC_FALSE; 926 #endif 927 928 PetscFunctionBegin; 929 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 930 /* first count number of contributors to each processor */ 931 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 932 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 933 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 934 j = 0; 935 for (i=0; i<N; i++) { 936 if (lastidx > (idx = rows[i])) j = 0; 937 lastidx = idx; 938 for (; j<size; j++) { 939 if (idx >= owners[j] && idx < owners[j+1]) { 940 nprocs[2*j]++; 941 nprocs[2*j+1] = 1; 942 owner[i] = j; 943 #if defined(PETSC_DEBUG) 944 found = PETSC_TRUE; 945 #endif 946 break; 947 } 948 } 949 #if defined(PETSC_DEBUG) 950 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 951 found = PETSC_FALSE; 952 #endif 953 } 954 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 955 956 /* inform other processors of number of messages and max length*/ 957 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 958 959 /* post receives: */ 960 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 961 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 962 for (i=0; i<nrecvs; i++) { 963 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 964 } 965 966 /* do sends: 967 1) starts[i] gives the starting index in svalues for stuff going to 968 the ith processor 969 */ 970 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 971 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 972 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 973 974 starts[0] = 0; 975 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 976 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 977 978 starts[0] = 0; 979 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 980 count = 0; 981 for (i=0; i<size; i++) { 982 if (nprocs[2*i+1]) { 983 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 984 } 985 } 986 ierr = PetscFree(starts);CHKERRQ(ierr); 987 988 base = owners[rank]; 989 990 /* wait on receives */ 991 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 992 count = nrecvs; slen = 0; 993 while (count) { 994 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 995 /* unpack receives into our local space */ 996 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 997 998 source[imdex] = recv_status.MPI_SOURCE; 999 lens[imdex] = n; 1000 slen += n; 1001 count--; 1002 } 1003 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1004 1005 /* move the data into the send scatter */ 1006 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1007 count = 0; 1008 for (i=0; i<nrecvs; i++) { 1009 values = rvalues + i*nmax; 1010 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1011 } 1012 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1013 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1014 ierr = PetscFree(owner);CHKERRQ(ierr); 1015 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1016 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1017 1018 /* zero diagonal part of matrix */ 1019 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1020 1021 /* handle off diagonal part of matrix */ 1022 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1023 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1024 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1025 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1026 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1027 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1030 if (x) { 1031 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1034 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1035 } 1036 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1037 1038 /* remove zeroed rows of off diagonal matrix */ 1039 ii = aij->i; 1040 for (i=0; i<slen; i++) { 1041 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1042 } 1043 1044 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1045 if (aij->compressedrow.use) { 1046 m = aij->compressedrow.nrows; 1047 ii = aij->compressedrow.i; 1048 ridx = aij->compressedrow.rindex; 1049 for (i=0; i<m; i++) { 1050 n = ii[i+1] - ii[i]; 1051 aj = aij->j + ii[i]; 1052 aa = aij->a + ii[i]; 1053 1054 for (j=0; j<n; j++) { 1055 if (PetscAbsScalar(mask[*aj])) { 1056 if (b) bb[*ridx] -= *aa*xx[*aj]; 1057 *aa = 0.0; 1058 } 1059 aa++; 1060 aj++; 1061 } 1062 ridx++; 1063 } 1064 } else { /* do not use compressed row format */ 1065 m = l->B->rmap->n; 1066 for (i=0; i<m; i++) { 1067 n = ii[i+1] - ii[i]; 1068 aj = aij->j + ii[i]; 1069 aa = aij->a + ii[i]; 1070 for (j=0; j<n; j++) { 1071 if (PetscAbsScalar(mask[*aj])) { 1072 if (b) bb[i] -= *aa*xx[*aj]; 1073 *aa = 0.0; 1074 } 1075 aa++; 1076 aj++; 1077 } 1078 } 1079 } 1080 if (x) { 1081 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1082 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1083 } 1084 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1085 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1086 ierr = PetscFree(lrows);CHKERRQ(ierr); 1087 1088 /* wait on sends */ 1089 if (nsends) { 1090 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1091 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1092 ierr = PetscFree(send_status);CHKERRQ(ierr); 1093 } 1094 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1095 ierr = PetscFree(svalues);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatMult_MPIAIJ" 1101 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1102 { 1103 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1104 PetscErrorCode ierr; 1105 PetscInt nt; 1106 1107 PetscFunctionBegin; 1108 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1109 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1110 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1111 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1112 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1113 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1119 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1120 { 1121 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1122 PetscErrorCode ierr; 1123 1124 PetscFunctionBegin; 1125 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1126 PetscFunctionReturn(0); 1127 } 1128 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1131 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1132 { 1133 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1134 PetscErrorCode ierr; 1135 1136 PetscFunctionBegin; 1137 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1138 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1139 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 #undef __FUNCT__ 1145 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1146 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1147 { 1148 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1149 PetscErrorCode ierr; 1150 PetscBool merged; 1151 1152 PetscFunctionBegin; 1153 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1154 /* do nondiagonal part */ 1155 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1156 if (!merged) { 1157 /* send it on its way */ 1158 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1159 /* do local part */ 1160 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1161 /* receive remote parts: note this assumes the values are not actually */ 1162 /* added in yy until the next line, */ 1163 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1164 } else { 1165 /* do local part */ 1166 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1167 /* send it on its way */ 1168 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1169 /* values actually were received in the Begin() but we need to call this nop */ 1170 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1171 } 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1177 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1178 { 1179 MPI_Comm comm; 1180 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1181 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1182 IS Me,Notme; 1183 PetscErrorCode ierr; 1184 PetscInt M,N,first,last,*notme,i; 1185 PetscMPIInt size; 1186 1187 PetscFunctionBegin; 1188 /* Easy test: symmetric diagonal block */ 1189 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1190 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1191 if (!*f) PetscFunctionReturn(0); 1192 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1193 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1194 if (size == 1) PetscFunctionReturn(0); 1195 1196 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1197 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1198 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1199 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1200 for (i=0; i<first; i++) notme[i] = i; 1201 for (i=last; i<M; i++) notme[i-last+first] = i; 1202 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1203 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1204 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1205 Aoff = Aoffs[0]; 1206 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1207 Boff = Boffs[0]; 1208 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1209 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1210 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1211 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1212 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1213 ierr = PetscFree(notme);CHKERRQ(ierr); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1219 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1220 { 1221 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1222 PetscErrorCode ierr; 1223 1224 PetscFunctionBegin; 1225 /* do nondiagonal part */ 1226 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1227 /* send it on its way */ 1228 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1229 /* do local part */ 1230 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1231 /* receive remote parts */ 1232 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 /* 1237 This only works correctly for square matrices where the subblock A->A is the 1238 diagonal block 1239 */ 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1242 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1243 { 1244 PetscErrorCode ierr; 1245 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1246 1247 PetscFunctionBegin; 1248 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1249 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1250 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatScale_MPIAIJ" 1256 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1257 { 1258 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1259 PetscErrorCode ierr; 1260 1261 PetscFunctionBegin; 1262 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1263 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatDestroy_MPIAIJ" 1269 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1270 { 1271 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1272 PetscErrorCode ierr; 1273 1274 PetscFunctionBegin; 1275 #if defined(PETSC_USE_LOG) 1276 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1277 #endif 1278 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1279 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1280 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1281 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1282 #if defined(PETSC_USE_CTABLE) 1283 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1284 #else 1285 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1286 #endif 1287 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1288 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1289 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1290 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1291 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1292 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1293 1294 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",NULL);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1308 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1309 { 1310 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1311 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1312 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1313 PetscErrorCode ierr; 1314 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1315 int fd; 1316 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1317 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1318 PetscScalar *column_values; 1319 PetscInt message_count,flowcontrolcount; 1320 FILE *file; 1321 1322 PetscFunctionBegin; 1323 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1324 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1325 nz = A->nz + B->nz; 1326 if (!rank) { 1327 header[0] = MAT_FILE_CLASSID; 1328 header[1] = mat->rmap->N; 1329 header[2] = mat->cmap->N; 1330 1331 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1332 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1333 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1334 /* get largest number of rows any processor has */ 1335 rlen = mat->rmap->n; 1336 range = mat->rmap->range; 1337 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1338 } else { 1339 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1340 rlen = mat->rmap->n; 1341 } 1342 1343 /* load up the local row counts */ 1344 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1345 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1346 1347 /* store the row lengths to the file */ 1348 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1349 if (!rank) { 1350 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1351 for (i=1; i<size; i++) { 1352 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1353 rlen = range[i+1] - range[i]; 1354 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1355 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1356 } 1357 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1358 } else { 1359 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1360 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1361 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1362 } 1363 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1364 1365 /* load up the local column indices */ 1366 nzmax = nz; /* th processor needs space a largest processor needs */ 1367 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1368 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1369 cnt = 0; 1370 for (i=0; i<mat->rmap->n; i++) { 1371 for (j=B->i[i]; j<B->i[i+1]; j++) { 1372 if ((col = garray[B->j[j]]) > cstart) break; 1373 column_indices[cnt++] = col; 1374 } 1375 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1376 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1377 } 1378 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1379 1380 /* store the column indices to the file */ 1381 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1382 if (!rank) { 1383 MPI_Status status; 1384 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1385 for (i=1; i<size; i++) { 1386 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1387 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1388 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1389 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1390 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1391 } 1392 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1393 } else { 1394 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1395 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1396 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1397 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1398 } 1399 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1400 1401 /* load up the local column values */ 1402 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1403 cnt = 0; 1404 for (i=0; i<mat->rmap->n; i++) { 1405 for (j=B->i[i]; j<B->i[i+1]; j++) { 1406 if (garray[B->j[j]] > cstart) break; 1407 column_values[cnt++] = B->a[j]; 1408 } 1409 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1410 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1411 } 1412 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1413 1414 /* store the column values to the file */ 1415 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1416 if (!rank) { 1417 MPI_Status status; 1418 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1419 for (i=1; i<size; i++) { 1420 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1421 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1422 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1423 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1424 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1425 } 1426 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1427 } else { 1428 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1429 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1430 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1431 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1432 } 1433 ierr = PetscFree(column_values);CHKERRQ(ierr); 1434 1435 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1436 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #include <petscdraw.h> 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1443 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1444 { 1445 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1446 PetscErrorCode ierr; 1447 PetscMPIInt rank = aij->rank,size = aij->size; 1448 PetscBool isdraw,iascii,isbinary; 1449 PetscViewer sviewer; 1450 PetscViewerFormat format; 1451 1452 PetscFunctionBegin; 1453 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1456 if (iascii) { 1457 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1458 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1459 MatInfo info; 1460 PetscBool inodes; 1461 1462 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1463 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1464 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1465 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1466 if (!inodes) { 1467 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1468 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1469 } else { 1470 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1471 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1472 } 1473 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1474 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1475 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1476 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1477 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1478 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1479 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1480 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1481 PetscFunctionReturn(0); 1482 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1483 PetscInt inodecount,inodelimit,*inodes; 1484 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1485 if (inodes) { 1486 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1487 } else { 1488 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1489 } 1490 PetscFunctionReturn(0); 1491 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1492 PetscFunctionReturn(0); 1493 } 1494 } else if (isbinary) { 1495 if (size == 1) { 1496 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1497 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1498 } else { 1499 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1500 } 1501 PetscFunctionReturn(0); 1502 } else if (isdraw) { 1503 PetscDraw draw; 1504 PetscBool isnull; 1505 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1506 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1507 } 1508 1509 if (size == 1) { 1510 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1511 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1512 } else { 1513 /* assemble the entire matrix onto first processor. */ 1514 Mat A; 1515 Mat_SeqAIJ *Aloc; 1516 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1517 MatScalar *a; 1518 1519 if (mat->rmap->N > 1024) { 1520 PetscBool flg = PETSC_FALSE; 1521 1522 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1523 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1524 } 1525 1526 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1527 if (!rank) { 1528 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1529 } else { 1530 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1531 } 1532 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1533 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1534 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1535 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1536 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1537 1538 /* copy over the A part */ 1539 Aloc = (Mat_SeqAIJ*)aij->A->data; 1540 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1541 row = mat->rmap->rstart; 1542 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1543 for (i=0; i<m; i++) { 1544 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1545 row++; 1546 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1547 } 1548 aj = Aloc->j; 1549 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1550 1551 /* copy over the B part */ 1552 Aloc = (Mat_SeqAIJ*)aij->B->data; 1553 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1554 row = mat->rmap->rstart; 1555 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1556 ct = cols; 1557 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1558 for (i=0; i<m; i++) { 1559 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1560 row++; 1561 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1562 } 1563 ierr = PetscFree(ct);CHKERRQ(ierr); 1564 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1565 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1566 /* 1567 Everyone has to call to draw the matrix since the graphics waits are 1568 synchronized across all processors that share the PetscDraw object 1569 */ 1570 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1571 if (!rank) { 1572 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1573 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1574 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1575 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1576 } 1577 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1578 ierr = MatDestroy(&A);CHKERRQ(ierr); 1579 } 1580 PetscFunctionReturn(0); 1581 } 1582 1583 #undef __FUNCT__ 1584 #define __FUNCT__ "MatView_MPIAIJ" 1585 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1586 { 1587 PetscErrorCode ierr; 1588 PetscBool iascii,isdraw,issocket,isbinary; 1589 1590 PetscFunctionBegin; 1591 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1592 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1595 if (iascii || isdraw || isbinary || issocket) { 1596 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1597 } 1598 PetscFunctionReturn(0); 1599 } 1600 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatSOR_MPIAIJ" 1603 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1604 { 1605 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1606 PetscErrorCode ierr; 1607 Vec bb1 = 0; 1608 PetscBool hasop; 1609 1610 PetscFunctionBegin; 1611 if (flag == SOR_APPLY_UPPER) { 1612 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1613 PetscFunctionReturn(0); 1614 } 1615 1616 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1617 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1618 } 1619 1620 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1621 if (flag & SOR_ZERO_INITIAL_GUESS) { 1622 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1623 its--; 1624 } 1625 1626 while (its--) { 1627 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1628 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 1630 /* update rhs: bb1 = bb - B*x */ 1631 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1632 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1633 1634 /* local sweep */ 1635 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1636 } 1637 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1638 if (flag & SOR_ZERO_INITIAL_GUESS) { 1639 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1640 its--; 1641 } 1642 while (its--) { 1643 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1644 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 1646 /* update rhs: bb1 = bb - B*x */ 1647 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1648 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1649 1650 /* local sweep */ 1651 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1652 } 1653 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1654 if (flag & SOR_ZERO_INITIAL_GUESS) { 1655 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1656 its--; 1657 } 1658 while (its--) { 1659 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1660 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 1662 /* update rhs: bb1 = bb - B*x */ 1663 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1664 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1665 1666 /* local sweep */ 1667 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1668 } 1669 } else if (flag & SOR_EISENSTAT) { 1670 Vec xx1; 1671 1672 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1673 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1674 1675 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1676 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1677 if (!mat->diag) { 1678 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1679 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1680 } 1681 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1682 if (hasop) { 1683 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1684 } else { 1685 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1686 } 1687 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1688 1689 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1690 1691 /* local sweep */ 1692 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1693 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1694 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1695 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1696 1697 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1698 PetscFunctionReturn(0); 1699 } 1700 1701 #undef __FUNCT__ 1702 #define __FUNCT__ "MatPermute_MPIAIJ" 1703 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1704 { 1705 Mat aA,aB,Aperm; 1706 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1707 PetscScalar *aa,*ba; 1708 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1709 PetscSF rowsf,sf; 1710 IS parcolp = NULL; 1711 PetscBool done; 1712 PetscErrorCode ierr; 1713 1714 PetscFunctionBegin; 1715 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1716 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1717 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1718 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1719 1720 /* Invert row permutation to find out where my rows should go */ 1721 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1722 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1723 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1724 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1725 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1726 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1727 1728 /* Invert column permutation to find out where my columns should go */ 1729 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1730 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1731 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1732 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1733 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1734 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1735 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1736 1737 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1738 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1739 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1740 1741 /* Find out where my gcols should go */ 1742 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1743 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1744 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1745 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1746 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1747 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1748 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1749 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1750 1751 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1752 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1753 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1755 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1756 for (i=0; i<m; i++) { 1757 PetscInt row = rdest[i],rowner; 1758 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1759 for (j=ai[i]; j<ai[i+1]; j++) { 1760 PetscInt cowner,col = cdest[aj[j]]; 1761 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1762 if (rowner == cowner) dnnz[i]++; 1763 else onnz[i]++; 1764 } 1765 for (j=bi[i]; j<bi[i+1]; j++) { 1766 PetscInt cowner,col = gcdest[bj[j]]; 1767 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1768 if (rowner == cowner) dnnz[i]++; 1769 else onnz[i]++; 1770 } 1771 } 1772 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1773 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1774 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1775 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1776 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1777 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1778 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1779 1780 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1781 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1782 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1783 for (i=0; i<m; i++) { 1784 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1785 PetscInt rowlen; 1786 rowlen = ai[i+1] - ai[i]; 1787 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1788 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1789 rowlen = bi[i+1] - bi[i]; 1790 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1791 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1792 } 1793 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1794 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1795 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1796 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1797 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1798 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1799 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1800 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1801 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1802 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1803 *B = Aperm; 1804 PetscFunctionReturn(0); 1805 } 1806 1807 #undef __FUNCT__ 1808 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1809 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1810 { 1811 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1812 Mat A = mat->A,B = mat->B; 1813 PetscErrorCode ierr; 1814 PetscReal isend[5],irecv[5]; 1815 1816 PetscFunctionBegin; 1817 info->block_size = 1.0; 1818 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1819 1820 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1821 isend[3] = info->memory; isend[4] = info->mallocs; 1822 1823 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1824 1825 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1826 isend[3] += info->memory; isend[4] += info->mallocs; 1827 if (flag == MAT_LOCAL) { 1828 info->nz_used = isend[0]; 1829 info->nz_allocated = isend[1]; 1830 info->nz_unneeded = isend[2]; 1831 info->memory = isend[3]; 1832 info->mallocs = isend[4]; 1833 } else if (flag == MAT_GLOBAL_MAX) { 1834 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1835 1836 info->nz_used = irecv[0]; 1837 info->nz_allocated = irecv[1]; 1838 info->nz_unneeded = irecv[2]; 1839 info->memory = irecv[3]; 1840 info->mallocs = irecv[4]; 1841 } else if (flag == MAT_GLOBAL_SUM) { 1842 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1843 1844 info->nz_used = irecv[0]; 1845 info->nz_allocated = irecv[1]; 1846 info->nz_unneeded = irecv[2]; 1847 info->memory = irecv[3]; 1848 info->mallocs = irecv[4]; 1849 } 1850 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1851 info->fill_ratio_needed = 0; 1852 info->factor_mallocs = 0; 1853 PetscFunctionReturn(0); 1854 } 1855 1856 #undef __FUNCT__ 1857 #define __FUNCT__ "MatSetOption_MPIAIJ" 1858 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1859 { 1860 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1861 PetscErrorCode ierr; 1862 1863 PetscFunctionBegin; 1864 switch (op) { 1865 case MAT_NEW_NONZERO_LOCATIONS: 1866 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1867 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1868 case MAT_KEEP_NONZERO_PATTERN: 1869 case MAT_NEW_NONZERO_LOCATION_ERR: 1870 case MAT_USE_INODES: 1871 case MAT_IGNORE_ZERO_ENTRIES: 1872 MatCheckPreallocated(A,1); 1873 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1874 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1875 break; 1876 case MAT_ROW_ORIENTED: 1877 a->roworiented = flg; 1878 1879 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1880 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1881 break; 1882 case MAT_NEW_DIAGONALS: 1883 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1884 break; 1885 case MAT_IGNORE_OFF_PROC_ENTRIES: 1886 a->donotstash = flg; 1887 break; 1888 case MAT_SPD: 1889 A->spd_set = PETSC_TRUE; 1890 A->spd = flg; 1891 if (flg) { 1892 A->symmetric = PETSC_TRUE; 1893 A->structurally_symmetric = PETSC_TRUE; 1894 A->symmetric_set = PETSC_TRUE; 1895 A->structurally_symmetric_set = PETSC_TRUE; 1896 } 1897 break; 1898 case MAT_SYMMETRIC: 1899 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1900 break; 1901 case MAT_STRUCTURALLY_SYMMETRIC: 1902 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1903 break; 1904 case MAT_HERMITIAN: 1905 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1906 break; 1907 case MAT_SYMMETRY_ETERNAL: 1908 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1909 break; 1910 default: 1911 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1912 } 1913 PetscFunctionReturn(0); 1914 } 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatGetRow_MPIAIJ" 1918 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1919 { 1920 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1921 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1922 PetscErrorCode ierr; 1923 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1924 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1925 PetscInt *cmap,*idx_p; 1926 1927 PetscFunctionBegin; 1928 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1929 mat->getrowactive = PETSC_TRUE; 1930 1931 if (!mat->rowvalues && (idx || v)) { 1932 /* 1933 allocate enough space to hold information from the longest row. 1934 */ 1935 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1936 PetscInt max = 1,tmp; 1937 for (i=0; i<matin->rmap->n; i++) { 1938 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1939 if (max < tmp) max = tmp; 1940 } 1941 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1942 } 1943 1944 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1945 lrow = row - rstart; 1946 1947 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1948 if (!v) {pvA = 0; pvB = 0;} 1949 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1950 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1951 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1952 nztot = nzA + nzB; 1953 1954 cmap = mat->garray; 1955 if (v || idx) { 1956 if (nztot) { 1957 /* Sort by increasing column numbers, assuming A and B already sorted */ 1958 PetscInt imark = -1; 1959 if (v) { 1960 *v = v_p = mat->rowvalues; 1961 for (i=0; i<nzB; i++) { 1962 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1963 else break; 1964 } 1965 imark = i; 1966 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1967 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1968 } 1969 if (idx) { 1970 *idx = idx_p = mat->rowindices; 1971 if (imark > -1) { 1972 for (i=0; i<imark; i++) { 1973 idx_p[i] = cmap[cworkB[i]]; 1974 } 1975 } else { 1976 for (i=0; i<nzB; i++) { 1977 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1978 else break; 1979 } 1980 imark = i; 1981 } 1982 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1983 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1984 } 1985 } else { 1986 if (idx) *idx = 0; 1987 if (v) *v = 0; 1988 } 1989 } 1990 *nz = nztot; 1991 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1992 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1993 PetscFunctionReturn(0); 1994 } 1995 1996 #undef __FUNCT__ 1997 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1998 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1999 { 2000 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2001 2002 PetscFunctionBegin; 2003 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2004 aij->getrowactive = PETSC_FALSE; 2005 PetscFunctionReturn(0); 2006 } 2007 2008 #undef __FUNCT__ 2009 #define __FUNCT__ "MatNorm_MPIAIJ" 2010 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2011 { 2012 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2013 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2014 PetscErrorCode ierr; 2015 PetscInt i,j,cstart = mat->cmap->rstart; 2016 PetscReal sum = 0.0; 2017 MatScalar *v; 2018 2019 PetscFunctionBegin; 2020 if (aij->size == 1) { 2021 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2022 } else { 2023 if (type == NORM_FROBENIUS) { 2024 v = amat->a; 2025 for (i=0; i<amat->nz; i++) { 2026 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2027 } 2028 v = bmat->a; 2029 for (i=0; i<bmat->nz; i++) { 2030 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2031 } 2032 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2033 *norm = PetscSqrtReal(*norm); 2034 } else if (type == NORM_1) { /* max column norm */ 2035 PetscReal *tmp,*tmp2; 2036 PetscInt *jj,*garray = aij->garray; 2037 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2038 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2039 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2040 *norm = 0.0; 2041 v = amat->a; jj = amat->j; 2042 for (j=0; j<amat->nz; j++) { 2043 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2044 } 2045 v = bmat->a; jj = bmat->j; 2046 for (j=0; j<bmat->nz; j++) { 2047 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2048 } 2049 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2050 for (j=0; j<mat->cmap->N; j++) { 2051 if (tmp2[j] > *norm) *norm = tmp2[j]; 2052 } 2053 ierr = PetscFree(tmp);CHKERRQ(ierr); 2054 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2055 } else if (type == NORM_INFINITY) { /* max row norm */ 2056 PetscReal ntemp = 0.0; 2057 for (j=0; j<aij->A->rmap->n; j++) { 2058 v = amat->a + amat->i[j]; 2059 sum = 0.0; 2060 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2061 sum += PetscAbsScalar(*v); v++; 2062 } 2063 v = bmat->a + bmat->i[j]; 2064 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2065 sum += PetscAbsScalar(*v); v++; 2066 } 2067 if (sum > ntemp) ntemp = sum; 2068 } 2069 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2070 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2071 } 2072 PetscFunctionReturn(0); 2073 } 2074 2075 #undef __FUNCT__ 2076 #define __FUNCT__ "MatTranspose_MPIAIJ" 2077 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2078 { 2079 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2080 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2081 PetscErrorCode ierr; 2082 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2083 PetscInt cstart = A->cmap->rstart,ncol; 2084 Mat B; 2085 MatScalar *array; 2086 2087 PetscFunctionBegin; 2088 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2089 2090 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2091 ai = Aloc->i; aj = Aloc->j; 2092 bi = Bloc->i; bj = Bloc->j; 2093 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2094 PetscInt *d_nnz,*g_nnz,*o_nnz; 2095 PetscSFNode *oloc; 2096 PETSC_UNUSED PetscSF sf; 2097 2098 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2099 /* compute d_nnz for preallocation */ 2100 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2101 for (i=0; i<ai[ma]; i++) { 2102 d_nnz[aj[i]]++; 2103 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2104 } 2105 /* compute local off-diagonal contributions */ 2106 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2107 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2108 /* map those to global */ 2109 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2110 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2111 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2112 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2113 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2114 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2115 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2116 2117 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2118 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2119 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2120 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2121 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2122 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2123 } else { 2124 B = *matout; 2125 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2126 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2127 } 2128 2129 /* copy over the A part */ 2130 array = Aloc->a; 2131 row = A->rmap->rstart; 2132 for (i=0; i<ma; i++) { 2133 ncol = ai[i+1]-ai[i]; 2134 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2135 row++; 2136 array += ncol; aj += ncol; 2137 } 2138 aj = Aloc->j; 2139 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2140 2141 /* copy over the B part */ 2142 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2143 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2144 array = Bloc->a; 2145 row = A->rmap->rstart; 2146 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2147 cols_tmp = cols; 2148 for (i=0; i<mb; i++) { 2149 ncol = bi[i+1]-bi[i]; 2150 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2151 row++; 2152 array += ncol; cols_tmp += ncol; 2153 } 2154 ierr = PetscFree(cols);CHKERRQ(ierr); 2155 2156 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2157 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2159 *matout = B; 2160 } else { 2161 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2162 } 2163 PetscFunctionReturn(0); 2164 } 2165 2166 #undef __FUNCT__ 2167 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2168 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2169 { 2170 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2171 Mat a = aij->A,b = aij->B; 2172 PetscErrorCode ierr; 2173 PetscInt s1,s2,s3; 2174 2175 PetscFunctionBegin; 2176 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2177 if (rr) { 2178 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2179 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2180 /* Overlap communication with computation. */ 2181 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2182 } 2183 if (ll) { 2184 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2185 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2186 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2187 } 2188 /* scale the diagonal block */ 2189 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2190 2191 if (rr) { 2192 /* Do a scatter end and then right scale the off-diagonal block */ 2193 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2194 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2195 } 2196 PetscFunctionReturn(0); 2197 } 2198 2199 #undef __FUNCT__ 2200 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2201 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2202 { 2203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2204 PetscErrorCode ierr; 2205 2206 PetscFunctionBegin; 2207 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2208 PetscFunctionReturn(0); 2209 } 2210 2211 #undef __FUNCT__ 2212 #define __FUNCT__ "MatEqual_MPIAIJ" 2213 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2214 { 2215 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2216 Mat a,b,c,d; 2217 PetscBool flg; 2218 PetscErrorCode ierr; 2219 2220 PetscFunctionBegin; 2221 a = matA->A; b = matA->B; 2222 c = matB->A; d = matB->B; 2223 2224 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2225 if (flg) { 2226 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2227 } 2228 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2229 PetscFunctionReturn(0); 2230 } 2231 2232 #undef __FUNCT__ 2233 #define __FUNCT__ "MatCopy_MPIAIJ" 2234 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2235 { 2236 PetscErrorCode ierr; 2237 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2238 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2239 2240 PetscFunctionBegin; 2241 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2242 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2243 /* because of the column compression in the off-processor part of the matrix a->B, 2244 the number of columns in a->B and b->B may be different, hence we cannot call 2245 the MatCopy() directly on the two parts. If need be, we can provide a more 2246 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2247 then copying the submatrices */ 2248 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2249 } else { 2250 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2251 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2252 } 2253 PetscFunctionReturn(0); 2254 } 2255 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "MatSetUp_MPIAIJ" 2258 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2259 { 2260 PetscErrorCode ierr; 2261 2262 PetscFunctionBegin; 2263 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2264 PetscFunctionReturn(0); 2265 } 2266 2267 #undef __FUNCT__ 2268 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2269 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2270 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2271 { 2272 PetscInt i,m=Y->rmap->N; 2273 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2274 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2275 const PetscInt *xi = x->i,*yi = y->i; 2276 2277 PetscFunctionBegin; 2278 /* Set the number of nonzeros in the new matrix */ 2279 for (i=0; i<m; i++) { 2280 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2281 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2282 nnz[i] = 0; 2283 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2284 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2285 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2286 nnz[i]++; 2287 } 2288 for (; k<nzy; k++) nnz[i]++; 2289 } 2290 PetscFunctionReturn(0); 2291 } 2292 2293 #undef __FUNCT__ 2294 #define __FUNCT__ "MatAXPY_MPIAIJ" 2295 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2296 { 2297 PetscErrorCode ierr; 2298 PetscInt i; 2299 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2300 PetscBLASInt bnz,one=1; 2301 Mat_SeqAIJ *x,*y; 2302 2303 PetscFunctionBegin; 2304 if (str == SAME_NONZERO_PATTERN) { 2305 PetscScalar alpha = a; 2306 x = (Mat_SeqAIJ*)xx->A->data; 2307 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2308 y = (Mat_SeqAIJ*)yy->A->data; 2309 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2310 x = (Mat_SeqAIJ*)xx->B->data; 2311 y = (Mat_SeqAIJ*)yy->B->data; 2312 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2313 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2314 } else if (str == SUBSET_NONZERO_PATTERN) { 2315 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2316 2317 x = (Mat_SeqAIJ*)xx->B->data; 2318 y = (Mat_SeqAIJ*)yy->B->data; 2319 if (y->xtoy && y->XtoY != xx->B) { 2320 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2321 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2322 } 2323 if (!y->xtoy) { /* get xtoy */ 2324 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2325 y->XtoY = xx->B; 2326 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2327 } 2328 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2329 } else { 2330 Mat B; 2331 PetscInt *nnz_d,*nnz_o; 2332 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2333 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2334 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2335 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2336 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2337 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2338 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2339 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2340 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2341 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2342 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2343 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2344 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2345 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2346 } 2347 PetscFunctionReturn(0); 2348 } 2349 2350 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2351 2352 #undef __FUNCT__ 2353 #define __FUNCT__ "MatConjugate_MPIAIJ" 2354 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2355 { 2356 #if defined(PETSC_USE_COMPLEX) 2357 PetscErrorCode ierr; 2358 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2359 2360 PetscFunctionBegin; 2361 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2362 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2363 #else 2364 PetscFunctionBegin; 2365 #endif 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatRealPart_MPIAIJ" 2371 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2372 { 2373 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2374 PetscErrorCode ierr; 2375 2376 PetscFunctionBegin; 2377 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2378 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2379 PetscFunctionReturn(0); 2380 } 2381 2382 #undef __FUNCT__ 2383 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2384 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2385 { 2386 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2387 PetscErrorCode ierr; 2388 2389 PetscFunctionBegin; 2390 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2391 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2392 PetscFunctionReturn(0); 2393 } 2394 2395 #if defined(PETSC_HAVE_PBGL) 2396 2397 #include <boost/parallel/mpi/bsp_process_group.hpp> 2398 #include <boost/graph/distributed/ilu_default_graph.hpp> 2399 #include <boost/graph/distributed/ilu_0_block.hpp> 2400 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2401 #include <boost/graph/distributed/petsc/interface.hpp> 2402 #include <boost/multi_array.hpp> 2403 #include <boost/parallel/distributed_property_map->hpp> 2404 2405 #undef __FUNCT__ 2406 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2407 /* 2408 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2409 */ 2410 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2411 { 2412 namespace petsc = boost::distributed::petsc; 2413 2414 namespace graph_dist = boost::graph::distributed; 2415 using boost::graph::distributed::ilu_default::process_group_type; 2416 using boost::graph::ilu_permuted; 2417 2418 PetscBool row_identity, col_identity; 2419 PetscContainer c; 2420 PetscInt m, n, M, N; 2421 PetscErrorCode ierr; 2422 2423 PetscFunctionBegin; 2424 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2425 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2426 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2427 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2428 2429 process_group_type pg; 2430 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2431 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2432 lgraph_type& level_graph = *lgraph_p; 2433 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2434 2435 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2436 ilu_permuted(level_graph); 2437 2438 /* put together the new matrix */ 2439 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2440 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2441 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2442 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2443 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2444 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2445 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2446 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2447 2448 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2449 ierr = PetscContainerSetPointer(c, lgraph_p); 2450 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2451 ierr = PetscContainerDestroy(&c); 2452 PetscFunctionReturn(0); 2453 } 2454 2455 #undef __FUNCT__ 2456 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2457 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2458 { 2459 PetscFunctionBegin; 2460 PetscFunctionReturn(0); 2461 } 2462 2463 #undef __FUNCT__ 2464 #define __FUNCT__ "MatSolve_MPIAIJ" 2465 /* 2466 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2467 */ 2468 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2469 { 2470 namespace graph_dist = boost::graph::distributed; 2471 2472 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2473 lgraph_type *lgraph_p; 2474 PetscContainer c; 2475 PetscErrorCode ierr; 2476 2477 PetscFunctionBegin; 2478 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2479 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2480 ierr = VecCopy(b, x);CHKERRQ(ierr); 2481 2482 PetscScalar *array_x; 2483 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2484 PetscInt sx; 2485 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2486 2487 PetscScalar *array_b; 2488 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2489 PetscInt sb; 2490 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2491 2492 lgraph_type& level_graph = *lgraph_p; 2493 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2494 2495 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2496 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2497 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2498 2499 typedef boost::iterator_property_map<array_ref_type::iterator, 2500 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2501 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2502 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2503 2504 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2505 PetscFunctionReturn(0); 2506 } 2507 #endif 2508 2509 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2510 PetscInt nzlocal,nsends,nrecvs; 2511 PetscMPIInt *send_rank,*recv_rank; 2512 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2513 PetscScalar *sbuf_a,**rbuf_a; 2514 PetscErrorCode (*Destroy)(Mat); 2515 } Mat_Redundant; 2516 2517 #undef __FUNCT__ 2518 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2519 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2520 { 2521 PetscErrorCode ierr; 2522 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2523 PetscInt i; 2524 2525 PetscFunctionBegin; 2526 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2527 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2528 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2529 for (i=0; i<redund->nrecvs; i++) { 2530 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2531 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2532 } 2533 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2534 ierr = PetscFree(redund);CHKERRQ(ierr); 2535 PetscFunctionReturn(0); 2536 } 2537 2538 #undef __FUNCT__ 2539 #define __FUNCT__ "MatDestroy_MatRedundant" 2540 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2541 { 2542 PetscErrorCode ierr; 2543 PetscContainer container; 2544 Mat_Redundant *redund=NULL; 2545 2546 PetscFunctionBegin; 2547 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2548 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2549 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2550 2551 A->ops->destroy = redund->Destroy; 2552 2553 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2554 if (A->ops->destroy) { 2555 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2556 } 2557 PetscFunctionReturn(0); 2558 } 2559 2560 #undef __FUNCT__ 2561 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2562 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2563 { 2564 PetscMPIInt rank,size; 2565 MPI_Comm comm; 2566 PetscErrorCode ierr; 2567 PetscInt nsends = 0,nrecvs=0,i,rownz_max=0; 2568 PetscMPIInt *send_rank= NULL,*recv_rank=NULL; 2569 PetscInt *rowrange = mat->rmap->range; 2570 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2571 Mat A = aij->A,B=aij->B,C=*matredundant; 2572 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2573 PetscScalar *sbuf_a; 2574 PetscInt nzlocal=a->nz+b->nz; 2575 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2576 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2577 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2578 MatScalar *aworkA,*aworkB; 2579 PetscScalar *vals; 2580 PetscMPIInt tag1,tag2,tag3,imdex; 2581 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2582 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2583 MPI_Status recv_status,*send_status; 2584 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2585 PetscInt **rbuf_j=NULL; 2586 PetscScalar **rbuf_a=NULL; 2587 Mat_Redundant *redund =NULL; 2588 PetscContainer container; 2589 2590 PetscFunctionBegin; 2591 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2592 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2593 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2594 2595 if (reuse == MAT_REUSE_MATRIX) { 2596 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2597 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2598 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2599 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2600 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2601 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2602 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2603 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2604 2605 nsends = redund->nsends; 2606 nrecvs = redund->nrecvs; 2607 send_rank = redund->send_rank; 2608 recv_rank = redund->recv_rank; 2609 sbuf_nz = redund->sbuf_nz; 2610 rbuf_nz = redund->rbuf_nz; 2611 sbuf_j = redund->sbuf_j; 2612 sbuf_a = redund->sbuf_a; 2613 rbuf_j = redund->rbuf_j; 2614 rbuf_a = redund->rbuf_a; 2615 } 2616 2617 if (reuse == MAT_INITIAL_MATRIX) { 2618 PetscMPIInt subrank,subsize; 2619 PetscInt nleftover,np_subcomm; 2620 /* get the destination processors' id send_rank, nsends and nrecvs */ 2621 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2622 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2623 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2624 2625 np_subcomm = size/nsubcomm; 2626 nleftover = size - nsubcomm*np_subcomm; 2627 2628 nsends = 0; nrecvs = 0; 2629 for (i=0; i<size; i++) { /* i=rank*/ 2630 if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */ 2631 send_rank[nsends] = i; nsends++; 2632 recv_rank[nrecvs++] = i; 2633 } 2634 } 2635 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2636 i = size-nleftover-1; 2637 j = 0; 2638 while (j < nsubcomm - nleftover) { 2639 send_rank[nsends++] = i; 2640 i--; j++; 2641 } 2642 } 2643 2644 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2645 for (i=0; i<nleftover; i++) { 2646 recv_rank[nrecvs++] = size-nleftover+i; 2647 } 2648 } 2649 2650 /* allocate sbuf_j, sbuf_a */ 2651 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2652 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2653 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2654 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2655 2656 /* copy mat's local entries into the buffers */ 2657 if (reuse == MAT_INITIAL_MATRIX) { 2658 rownz_max = 0; 2659 rptr = sbuf_j; 2660 cols = sbuf_j + rend-rstart + 1; 2661 vals = sbuf_a; 2662 rptr[0] = 0; 2663 for (i=0; i<rend-rstart; i++) { 2664 row = i + rstart; 2665 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2666 ncols = nzA + nzB; 2667 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2668 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2669 /* load the column indices for this row into cols */ 2670 lwrite = 0; 2671 for (l=0; l<nzB; l++) { 2672 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2673 vals[lwrite] = aworkB[l]; 2674 cols[lwrite++] = ctmp; 2675 } 2676 } 2677 for (l=0; l<nzA; l++) { 2678 vals[lwrite] = aworkA[l]; 2679 cols[lwrite++] = cstart + cworkA[l]; 2680 } 2681 for (l=0; l<nzB; l++) { 2682 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2683 vals[lwrite] = aworkB[l]; 2684 cols[lwrite++] = ctmp; 2685 } 2686 } 2687 vals += ncols; 2688 cols += ncols; 2689 rptr[i+1] = rptr[i] + ncols; 2690 if (rownz_max < ncols) rownz_max = ncols; 2691 } 2692 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2693 } else { /* only copy matrix values into sbuf_a */ 2694 rptr = sbuf_j; 2695 vals = sbuf_a; 2696 rptr[0] = 0; 2697 for (i=0; i<rend-rstart; i++) { 2698 row = i + rstart; 2699 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2700 ncols = nzA + nzB; 2701 cworkB = b->j + b->i[i]; 2702 aworkA = a->a + a->i[i]; 2703 aworkB = b->a + b->i[i]; 2704 lwrite = 0; 2705 for (l=0; l<nzB; l++) { 2706 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2707 } 2708 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2709 for (l=0; l<nzB; l++) { 2710 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2711 } 2712 vals += ncols; 2713 rptr[i+1] = rptr[i] + ncols; 2714 } 2715 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2716 2717 /* send nzlocal to others, and recv other's nzlocal */ 2718 /*--------------------------------------------------*/ 2719 if (reuse == MAT_INITIAL_MATRIX) { 2720 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2721 2722 s_waits2 = s_waits3 + nsends; 2723 s_waits1 = s_waits2 + nsends; 2724 r_waits1 = s_waits1 + nsends; 2725 r_waits2 = r_waits1 + nrecvs; 2726 r_waits3 = r_waits2 + nrecvs; 2727 } else { 2728 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2729 2730 r_waits3 = s_waits3 + nsends; 2731 } 2732 2733 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2734 if (reuse == MAT_INITIAL_MATRIX) { 2735 /* get new tags to keep the communication clean */ 2736 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2737 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2738 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2739 2740 /* post receives of other's nzlocal */ 2741 for (i=0; i<nrecvs; i++) { 2742 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2743 } 2744 /* send nzlocal to others */ 2745 for (i=0; i<nsends; i++) { 2746 sbuf_nz[i] = nzlocal; 2747 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2748 } 2749 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2750 count = nrecvs; 2751 while (count) { 2752 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2753 2754 recv_rank[imdex] = recv_status.MPI_SOURCE; 2755 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2756 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2757 2758 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2759 2760 rbuf_nz[imdex] += i + 2; 2761 2762 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2763 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2764 count--; 2765 } 2766 /* wait on sends of nzlocal */ 2767 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2768 /* send mat->i,j to others, and recv from other's */ 2769 /*------------------------------------------------*/ 2770 for (i=0; i<nsends; i++) { 2771 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2772 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2773 } 2774 /* wait on receives of mat->i,j */ 2775 /*------------------------------*/ 2776 count = nrecvs; 2777 while (count) { 2778 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2779 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2780 count--; 2781 } 2782 /* wait on sends of mat->i,j */ 2783 /*---------------------------*/ 2784 if (nsends) { 2785 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2786 } 2787 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2788 2789 /* post receives, send and receive mat->a */ 2790 /*----------------------------------------*/ 2791 for (imdex=0; imdex<nrecvs; imdex++) { 2792 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2793 } 2794 for (i=0; i<nsends; i++) { 2795 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2796 } 2797 count = nrecvs; 2798 while (count) { 2799 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2800 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2801 count--; 2802 } 2803 if (nsends) { 2804 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2805 } 2806 2807 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2808 2809 /* create redundant matrix */ 2810 /*-------------------------*/ 2811 if (reuse == MAT_INITIAL_MATRIX) { 2812 /* compute rownz_max for preallocation */ 2813 for (imdex=0; imdex<nrecvs; imdex++) { 2814 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2815 rptr = rbuf_j[imdex]; 2816 for (i=0; i<j; i++) { 2817 ncols = rptr[i+1] - rptr[i]; 2818 if (rownz_max < ncols) rownz_max = ncols; 2819 } 2820 } 2821 2822 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2823 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2824 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2825 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2826 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2827 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2828 } else { 2829 C = *matredundant; 2830 } 2831 2832 /* insert local matrix entries */ 2833 rptr = sbuf_j; 2834 cols = sbuf_j + rend-rstart + 1; 2835 vals = sbuf_a; 2836 for (i=0; i<rend-rstart; i++) { 2837 row = i + rstart; 2838 ncols = rptr[i+1] - rptr[i]; 2839 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2840 vals += ncols; 2841 cols += ncols; 2842 } 2843 /* insert received matrix entries */ 2844 for (imdex=0; imdex<nrecvs; imdex++) { 2845 rstart = rowrange[recv_rank[imdex]]; 2846 rend = rowrange[recv_rank[imdex]+1]; 2847 rptr = rbuf_j[imdex]; 2848 cols = rbuf_j[imdex] + rend-rstart + 1; 2849 vals = rbuf_a[imdex]; 2850 for (i=0; i<rend-rstart; i++) { 2851 row = i + rstart; 2852 ncols = rptr[i+1] - rptr[i]; 2853 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2854 vals += ncols; 2855 cols += ncols; 2856 } 2857 } 2858 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2859 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2860 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2861 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2862 if (reuse == MAT_INITIAL_MATRIX) { 2863 PetscContainer container; 2864 *matredundant = C; 2865 /* create a supporting struct and attach it to C for reuse */ 2866 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2867 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2868 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2869 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2870 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2871 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2872 2873 redund->nzlocal = nzlocal; 2874 redund->nsends = nsends; 2875 redund->nrecvs = nrecvs; 2876 redund->send_rank = send_rank; 2877 redund->recv_rank = recv_rank; 2878 redund->sbuf_nz = sbuf_nz; 2879 redund->rbuf_nz = rbuf_nz; 2880 redund->sbuf_j = sbuf_j; 2881 redund->sbuf_a = sbuf_a; 2882 redund->rbuf_j = rbuf_j; 2883 redund->rbuf_a = rbuf_a; 2884 2885 redund->Destroy = C->ops->destroy; 2886 C->ops->destroy = MatDestroy_MatRedundant; 2887 } 2888 PetscFunctionReturn(0); 2889 } 2890 2891 #undef __FUNCT__ 2892 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2893 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2894 { 2895 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2896 PetscErrorCode ierr; 2897 PetscInt i,*idxb = 0; 2898 PetscScalar *va,*vb; 2899 Vec vtmp; 2900 2901 PetscFunctionBegin; 2902 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2903 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2904 if (idx) { 2905 for (i=0; i<A->rmap->n; i++) { 2906 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2907 } 2908 } 2909 2910 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2911 if (idx) { 2912 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2913 } 2914 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2915 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2916 2917 for (i=0; i<A->rmap->n; i++) { 2918 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2919 va[i] = vb[i]; 2920 if (idx) idx[i] = a->garray[idxb[i]]; 2921 } 2922 } 2923 2924 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2925 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2926 ierr = PetscFree(idxb);CHKERRQ(ierr); 2927 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2928 PetscFunctionReturn(0); 2929 } 2930 2931 #undef __FUNCT__ 2932 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2933 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2934 { 2935 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2936 PetscErrorCode ierr; 2937 PetscInt i,*idxb = 0; 2938 PetscScalar *va,*vb; 2939 Vec vtmp; 2940 2941 PetscFunctionBegin; 2942 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2943 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2944 if (idx) { 2945 for (i=0; i<A->cmap->n; i++) { 2946 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2947 } 2948 } 2949 2950 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2951 if (idx) { 2952 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2953 } 2954 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2955 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2956 2957 for (i=0; i<A->rmap->n; i++) { 2958 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2959 va[i] = vb[i]; 2960 if (idx) idx[i] = a->garray[idxb[i]]; 2961 } 2962 } 2963 2964 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2965 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2966 ierr = PetscFree(idxb);CHKERRQ(ierr); 2967 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2968 PetscFunctionReturn(0); 2969 } 2970 2971 #undef __FUNCT__ 2972 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2973 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2974 { 2975 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2976 PetscInt n = A->rmap->n; 2977 PetscInt cstart = A->cmap->rstart; 2978 PetscInt *cmap = mat->garray; 2979 PetscInt *diagIdx, *offdiagIdx; 2980 Vec diagV, offdiagV; 2981 PetscScalar *a, *diagA, *offdiagA; 2982 PetscInt r; 2983 PetscErrorCode ierr; 2984 2985 PetscFunctionBegin; 2986 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2987 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2988 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2989 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2990 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2991 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2992 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2993 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2994 for (r = 0; r < n; ++r) { 2995 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2996 a[r] = diagA[r]; 2997 idx[r] = cstart + diagIdx[r]; 2998 } else { 2999 a[r] = offdiagA[r]; 3000 idx[r] = cmap[offdiagIdx[r]]; 3001 } 3002 } 3003 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3004 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3005 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3006 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3007 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3008 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3009 PetscFunctionReturn(0); 3010 } 3011 3012 #undef __FUNCT__ 3013 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3014 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3015 { 3016 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3017 PetscInt n = A->rmap->n; 3018 PetscInt cstart = A->cmap->rstart; 3019 PetscInt *cmap = mat->garray; 3020 PetscInt *diagIdx, *offdiagIdx; 3021 Vec diagV, offdiagV; 3022 PetscScalar *a, *diagA, *offdiagA; 3023 PetscInt r; 3024 PetscErrorCode ierr; 3025 3026 PetscFunctionBegin; 3027 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3028 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3029 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3030 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3031 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3032 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3033 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3034 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3035 for (r = 0; r < n; ++r) { 3036 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3037 a[r] = diagA[r]; 3038 idx[r] = cstart + diagIdx[r]; 3039 } else { 3040 a[r] = offdiagA[r]; 3041 idx[r] = cmap[offdiagIdx[r]]; 3042 } 3043 } 3044 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3045 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3046 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3047 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3048 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3049 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3050 PetscFunctionReturn(0); 3051 } 3052 3053 #undef __FUNCT__ 3054 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3055 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3056 { 3057 PetscErrorCode ierr; 3058 Mat *dummy; 3059 3060 PetscFunctionBegin; 3061 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3062 *newmat = *dummy; 3063 ierr = PetscFree(dummy);CHKERRQ(ierr); 3064 PetscFunctionReturn(0); 3065 } 3066 3067 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3068 3069 #undef __FUNCT__ 3070 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3071 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3072 { 3073 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3074 PetscErrorCode ierr; 3075 3076 PetscFunctionBegin; 3077 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3078 PetscFunctionReturn(0); 3079 } 3080 3081 #undef __FUNCT__ 3082 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3083 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3084 { 3085 PetscErrorCode ierr; 3086 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3087 3088 PetscFunctionBegin; 3089 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3090 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3091 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3092 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3093 PetscFunctionReturn(0); 3094 } 3095 3096 /* -------------------------------------------------------------------*/ 3097 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3098 MatGetRow_MPIAIJ, 3099 MatRestoreRow_MPIAIJ, 3100 MatMult_MPIAIJ, 3101 /* 4*/ MatMultAdd_MPIAIJ, 3102 MatMultTranspose_MPIAIJ, 3103 MatMultTransposeAdd_MPIAIJ, 3104 #if defined(PETSC_HAVE_PBGL) 3105 MatSolve_MPIAIJ, 3106 #else 3107 0, 3108 #endif 3109 0, 3110 0, 3111 /*10*/ 0, 3112 0, 3113 0, 3114 MatSOR_MPIAIJ, 3115 MatTranspose_MPIAIJ, 3116 /*15*/ MatGetInfo_MPIAIJ, 3117 MatEqual_MPIAIJ, 3118 MatGetDiagonal_MPIAIJ, 3119 MatDiagonalScale_MPIAIJ, 3120 MatNorm_MPIAIJ, 3121 /*20*/ MatAssemblyBegin_MPIAIJ, 3122 MatAssemblyEnd_MPIAIJ, 3123 MatSetOption_MPIAIJ, 3124 MatZeroEntries_MPIAIJ, 3125 /*24*/ MatZeroRows_MPIAIJ, 3126 0, 3127 #if defined(PETSC_HAVE_PBGL) 3128 0, 3129 #else 3130 0, 3131 #endif 3132 0, 3133 0, 3134 /*29*/ MatSetUp_MPIAIJ, 3135 #if defined(PETSC_HAVE_PBGL) 3136 0, 3137 #else 3138 0, 3139 #endif 3140 0, 3141 0, 3142 0, 3143 /*34*/ MatDuplicate_MPIAIJ, 3144 0, 3145 0, 3146 0, 3147 0, 3148 /*39*/ MatAXPY_MPIAIJ, 3149 MatGetSubMatrices_MPIAIJ, 3150 MatIncreaseOverlap_MPIAIJ, 3151 MatGetValues_MPIAIJ, 3152 MatCopy_MPIAIJ, 3153 /*44*/ MatGetRowMax_MPIAIJ, 3154 MatScale_MPIAIJ, 3155 0, 3156 0, 3157 MatZeroRowsColumns_MPIAIJ, 3158 /*49*/ MatSetRandom_MPIAIJ, 3159 0, 3160 0, 3161 0, 3162 0, 3163 /*54*/ MatFDColoringCreate_MPIAIJ, 3164 0, 3165 MatSetUnfactored_MPIAIJ, 3166 MatPermute_MPIAIJ, 3167 0, 3168 /*59*/ MatGetSubMatrix_MPIAIJ, 3169 MatDestroy_MPIAIJ, 3170 MatView_MPIAIJ, 3171 0, 3172 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3173 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3174 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3175 0, 3176 0, 3177 0, 3178 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3179 MatGetRowMinAbs_MPIAIJ, 3180 0, 3181 MatSetColoring_MPIAIJ, 3182 0, 3183 MatSetValuesAdifor_MPIAIJ, 3184 /*75*/ MatFDColoringApply_AIJ, 3185 0, 3186 0, 3187 0, 3188 MatFindZeroDiagonals_MPIAIJ, 3189 /*80*/ 0, 3190 0, 3191 0, 3192 /*83*/ MatLoad_MPIAIJ, 3193 0, 3194 0, 3195 0, 3196 0, 3197 0, 3198 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3199 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3200 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3201 MatPtAP_MPIAIJ_MPIAIJ, 3202 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3203 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3204 0, 3205 0, 3206 0, 3207 0, 3208 /*99*/ 0, 3209 0, 3210 0, 3211 MatConjugate_MPIAIJ, 3212 0, 3213 /*104*/MatSetValuesRow_MPIAIJ, 3214 MatRealPart_MPIAIJ, 3215 MatImaginaryPart_MPIAIJ, 3216 0, 3217 0, 3218 /*109*/0, 3219 MatGetRedundantMatrix_MPIAIJ, 3220 MatGetRowMin_MPIAIJ, 3221 0, 3222 0, 3223 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3224 0, 3225 0, 3226 0, 3227 0, 3228 /*119*/0, 3229 0, 3230 0, 3231 0, 3232 MatGetMultiProcBlock_MPIAIJ, 3233 /*124*/MatFindNonzeroRows_MPIAIJ, 3234 MatGetColumnNorms_MPIAIJ, 3235 MatInvertBlockDiagonal_MPIAIJ, 3236 0, 3237 MatGetSubMatricesParallel_MPIAIJ, 3238 /*129*/0, 3239 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3240 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3241 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3242 0, 3243 /*134*/0, 3244 0, 3245 0, 3246 0, 3247 0, 3248 /*139*/0, 3249 0 3250 }; 3251 3252 /* ----------------------------------------------------------------------------------------*/ 3253 3254 #undef __FUNCT__ 3255 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3256 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3257 { 3258 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3259 PetscErrorCode ierr; 3260 3261 PetscFunctionBegin; 3262 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3263 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3264 PetscFunctionReturn(0); 3265 } 3266 3267 #undef __FUNCT__ 3268 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3269 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3270 { 3271 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3272 PetscErrorCode ierr; 3273 3274 PetscFunctionBegin; 3275 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3276 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3277 PetscFunctionReturn(0); 3278 } 3279 3280 #undef __FUNCT__ 3281 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3282 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3283 { 3284 Mat_MPIAIJ *b; 3285 PetscErrorCode ierr; 3286 PetscInt i; 3287 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3288 3289 PetscFunctionBegin; 3290 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3291 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3292 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3293 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3294 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3295 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3296 3297 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3298 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3299 if (d_nnz) { 3300 for (i=0; i<B->rmap->n; i++) { 3301 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3302 } 3303 } 3304 if (o_nnz) { 3305 for (i=0; i<B->rmap->n; i++) { 3306 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3307 } 3308 } 3309 b = (Mat_MPIAIJ*)B->data; 3310 3311 if (!B->preallocated) { 3312 /* Explicitly create 2 MATSEQAIJ matrices. */ 3313 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3314 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3315 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3316 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3317 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3318 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3319 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3320 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3321 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3322 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3323 } 3324 3325 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3326 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3327 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3328 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3329 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3330 B->preallocated = PETSC_TRUE; 3331 PetscFunctionReturn(0); 3332 } 3333 3334 #undef __FUNCT__ 3335 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3336 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3337 { 3338 Mat mat; 3339 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3340 PetscErrorCode ierr; 3341 3342 PetscFunctionBegin; 3343 *newmat = 0; 3344 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3345 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3346 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3347 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3348 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3349 a = (Mat_MPIAIJ*)mat->data; 3350 3351 mat->factortype = matin->factortype; 3352 mat->rmap->bs = matin->rmap->bs; 3353 mat->cmap->bs = matin->cmap->bs; 3354 mat->assembled = PETSC_TRUE; 3355 mat->insertmode = NOT_SET_VALUES; 3356 mat->preallocated = PETSC_TRUE; 3357 3358 a->size = oldmat->size; 3359 a->rank = oldmat->rank; 3360 a->donotstash = oldmat->donotstash; 3361 a->roworiented = oldmat->roworiented; 3362 a->rowindices = 0; 3363 a->rowvalues = 0; 3364 a->getrowactive = PETSC_FALSE; 3365 3366 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3367 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3368 3369 if (oldmat->colmap) { 3370 #if defined(PETSC_USE_CTABLE) 3371 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3372 #else 3373 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3374 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3375 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3376 #endif 3377 } else a->colmap = 0; 3378 if (oldmat->garray) { 3379 PetscInt len; 3380 len = oldmat->B->cmap->n; 3381 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3382 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3383 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3384 } else a->garray = 0; 3385 3386 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3387 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3388 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3389 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3390 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3391 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3392 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3393 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3394 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3395 *newmat = mat; 3396 PetscFunctionReturn(0); 3397 } 3398 3399 3400 3401 #undef __FUNCT__ 3402 #define __FUNCT__ "MatLoad_MPIAIJ" 3403 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3404 { 3405 PetscScalar *vals,*svals; 3406 MPI_Comm comm; 3407 PetscErrorCode ierr; 3408 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3409 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3410 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3411 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3412 PetscInt cend,cstart,n,*rowners,sizesset=1; 3413 int fd; 3414 PetscInt bs = 1; 3415 3416 PetscFunctionBegin; 3417 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3418 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3419 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3420 if (!rank) { 3421 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3422 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3423 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3424 } 3425 3426 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3427 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3428 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3429 3430 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3431 3432 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3433 M = header[1]; N = header[2]; 3434 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3435 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3436 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3437 3438 /* If global sizes are set, check if they are consistent with that given in the file */ 3439 if (sizesset) { 3440 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3441 } 3442 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3443 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3444 3445 /* determine ownership of all (block) rows */ 3446 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3447 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3448 else m = newMat->rmap->n; /* Set by user */ 3449 3450 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3451 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3452 3453 /* First process needs enough room for process with most rows */ 3454 if (!rank) { 3455 mmax = rowners[1]; 3456 for (i=2; i<=size; i++) { 3457 mmax = PetscMax(mmax, rowners[i]); 3458 } 3459 } else mmax = -1; /* unused, but compilers complain */ 3460 3461 rowners[0] = 0; 3462 for (i=2; i<=size; i++) { 3463 rowners[i] += rowners[i-1]; 3464 } 3465 rstart = rowners[rank]; 3466 rend = rowners[rank+1]; 3467 3468 /* distribute row lengths to all processors */ 3469 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 3470 if (!rank) { 3471 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3472 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3473 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3474 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3475 for (j=0; j<m; j++) { 3476 procsnz[0] += ourlens[j]; 3477 } 3478 for (i=1; i<size; i++) { 3479 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3480 /* calculate the number of nonzeros on each processor */ 3481 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3482 procsnz[i] += rowlengths[j]; 3483 } 3484 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3485 } 3486 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3487 } else { 3488 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3489 } 3490 3491 if (!rank) { 3492 /* determine max buffer needed and allocate it */ 3493 maxnz = 0; 3494 for (i=0; i<size; i++) { 3495 maxnz = PetscMax(maxnz,procsnz[i]); 3496 } 3497 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3498 3499 /* read in my part of the matrix column indices */ 3500 nz = procsnz[0]; 3501 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3502 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3503 3504 /* read in every one elses and ship off */ 3505 for (i=1; i<size; i++) { 3506 nz = procsnz[i]; 3507 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3508 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3509 } 3510 ierr = PetscFree(cols);CHKERRQ(ierr); 3511 } else { 3512 /* determine buffer space needed for message */ 3513 nz = 0; 3514 for (i=0; i<m; i++) { 3515 nz += ourlens[i]; 3516 } 3517 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3518 3519 /* receive message of column indices*/ 3520 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3521 } 3522 3523 /* determine column ownership if matrix is not square */ 3524 if (N != M) { 3525 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3526 else n = newMat->cmap->n; 3527 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3528 cstart = cend - n; 3529 } else { 3530 cstart = rstart; 3531 cend = rend; 3532 n = cend - cstart; 3533 } 3534 3535 /* loop over local rows, determining number of off diagonal entries */ 3536 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3537 jj = 0; 3538 for (i=0; i<m; i++) { 3539 for (j=0; j<ourlens[i]; j++) { 3540 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3541 jj++; 3542 } 3543 } 3544 3545 for (i=0; i<m; i++) { 3546 ourlens[i] -= offlens[i]; 3547 } 3548 if (!sizesset) { 3549 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3550 } 3551 3552 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3553 3554 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3555 3556 for (i=0; i<m; i++) { 3557 ourlens[i] += offlens[i]; 3558 } 3559 3560 if (!rank) { 3561 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3562 3563 /* read in my part of the matrix numerical values */ 3564 nz = procsnz[0]; 3565 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3566 3567 /* insert into matrix */ 3568 jj = rstart; 3569 smycols = mycols; 3570 svals = vals; 3571 for (i=0; i<m; i++) { 3572 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3573 smycols += ourlens[i]; 3574 svals += ourlens[i]; 3575 jj++; 3576 } 3577 3578 /* read in other processors and ship out */ 3579 for (i=1; i<size; i++) { 3580 nz = procsnz[i]; 3581 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3582 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3583 } 3584 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3585 } else { 3586 /* receive numeric values */ 3587 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3588 3589 /* receive message of values*/ 3590 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3591 3592 /* insert into matrix */ 3593 jj = rstart; 3594 smycols = mycols; 3595 svals = vals; 3596 for (i=0; i<m; i++) { 3597 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3598 smycols += ourlens[i]; 3599 svals += ourlens[i]; 3600 jj++; 3601 } 3602 } 3603 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3604 ierr = PetscFree(vals);CHKERRQ(ierr); 3605 ierr = PetscFree(mycols);CHKERRQ(ierr); 3606 ierr = PetscFree(rowners);CHKERRQ(ierr); 3607 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3608 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3609 PetscFunctionReturn(0); 3610 } 3611 3612 #undef __FUNCT__ 3613 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3614 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3615 { 3616 PetscErrorCode ierr; 3617 IS iscol_local; 3618 PetscInt csize; 3619 3620 PetscFunctionBegin; 3621 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3622 if (call == MAT_REUSE_MATRIX) { 3623 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3624 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3625 } else { 3626 PetscInt cbs; 3627 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3628 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3629 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3630 } 3631 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3632 if (call == MAT_INITIAL_MATRIX) { 3633 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3634 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3635 } 3636 PetscFunctionReturn(0); 3637 } 3638 3639 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3640 #undef __FUNCT__ 3641 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3642 /* 3643 Not great since it makes two copies of the submatrix, first an SeqAIJ 3644 in local and then by concatenating the local matrices the end result. 3645 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3646 3647 Note: This requires a sequential iscol with all indices. 3648 */ 3649 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3650 { 3651 PetscErrorCode ierr; 3652 PetscMPIInt rank,size; 3653 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3654 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3655 PetscBool allcolumns, colflag; 3656 Mat M,Mreuse; 3657 MatScalar *vwork,*aa; 3658 MPI_Comm comm; 3659 Mat_SeqAIJ *aij; 3660 3661 PetscFunctionBegin; 3662 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3663 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3664 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3665 3666 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3667 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3668 if (colflag && ncol == mat->cmap->N) { 3669 allcolumns = PETSC_TRUE; 3670 } else { 3671 allcolumns = PETSC_FALSE; 3672 } 3673 if (call == MAT_REUSE_MATRIX) { 3674 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3675 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3676 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3677 } else { 3678 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3679 } 3680 3681 /* 3682 m - number of local rows 3683 n - number of columns (same on all processors) 3684 rstart - first row in new global matrix generated 3685 */ 3686 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3687 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3688 if (call == MAT_INITIAL_MATRIX) { 3689 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3690 ii = aij->i; 3691 jj = aij->j; 3692 3693 /* 3694 Determine the number of non-zeros in the diagonal and off-diagonal 3695 portions of the matrix in order to do correct preallocation 3696 */ 3697 3698 /* first get start and end of "diagonal" columns */ 3699 if (csize == PETSC_DECIDE) { 3700 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3701 if (mglobal == n) { /* square matrix */ 3702 nlocal = m; 3703 } else { 3704 nlocal = n/size + ((n % size) > rank); 3705 } 3706 } else { 3707 nlocal = csize; 3708 } 3709 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3710 rstart = rend - nlocal; 3711 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3712 3713 /* next, compute all the lengths */ 3714 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3715 olens = dlens + m; 3716 for (i=0; i<m; i++) { 3717 jend = ii[i+1] - ii[i]; 3718 olen = 0; 3719 dlen = 0; 3720 for (j=0; j<jend; j++) { 3721 if (*jj < rstart || *jj >= rend) olen++; 3722 else dlen++; 3723 jj++; 3724 } 3725 olens[i] = olen; 3726 dlens[i] = dlen; 3727 } 3728 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3729 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3730 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3731 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3732 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3733 ierr = PetscFree(dlens);CHKERRQ(ierr); 3734 } else { 3735 PetscInt ml,nl; 3736 3737 M = *newmat; 3738 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3739 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3740 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3741 /* 3742 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3743 rather than the slower MatSetValues(). 3744 */ 3745 M->was_assembled = PETSC_TRUE; 3746 M->assembled = PETSC_FALSE; 3747 } 3748 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3749 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3750 ii = aij->i; 3751 jj = aij->j; 3752 aa = aij->a; 3753 for (i=0; i<m; i++) { 3754 row = rstart + i; 3755 nz = ii[i+1] - ii[i]; 3756 cwork = jj; jj += nz; 3757 vwork = aa; aa += nz; 3758 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3759 } 3760 3761 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3762 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3763 *newmat = M; 3764 3765 /* save submatrix used in processor for next request */ 3766 if (call == MAT_INITIAL_MATRIX) { 3767 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3768 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3769 } 3770 PetscFunctionReturn(0); 3771 } 3772 3773 #undef __FUNCT__ 3774 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3775 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3776 { 3777 PetscInt m,cstart, cend,j,nnz,i,d; 3778 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3779 const PetscInt *JJ; 3780 PetscScalar *values; 3781 PetscErrorCode ierr; 3782 3783 PetscFunctionBegin; 3784 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3785 3786 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3787 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3788 m = B->rmap->n; 3789 cstart = B->cmap->rstart; 3790 cend = B->cmap->rend; 3791 rstart = B->rmap->rstart; 3792 3793 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3794 3795 #if defined(PETSC_USE_DEBUGGING) 3796 for (i=0; i<m; i++) { 3797 nnz = Ii[i+1]- Ii[i]; 3798 JJ = J + Ii[i]; 3799 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3800 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3801 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3802 } 3803 #endif 3804 3805 for (i=0; i<m; i++) { 3806 nnz = Ii[i+1]- Ii[i]; 3807 JJ = J + Ii[i]; 3808 nnz_max = PetscMax(nnz_max,nnz); 3809 d = 0; 3810 for (j=0; j<nnz; j++) { 3811 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3812 } 3813 d_nnz[i] = d; 3814 o_nnz[i] = nnz - d; 3815 } 3816 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3817 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3818 3819 if (v) values = (PetscScalar*)v; 3820 else { 3821 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3822 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3823 } 3824 3825 for (i=0; i<m; i++) { 3826 ii = i + rstart; 3827 nnz = Ii[i+1]- Ii[i]; 3828 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3829 } 3830 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3831 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3832 3833 if (!v) { 3834 ierr = PetscFree(values);CHKERRQ(ierr); 3835 } 3836 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3837 PetscFunctionReturn(0); 3838 } 3839 3840 #undef __FUNCT__ 3841 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3842 /*@ 3843 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3844 (the default parallel PETSc format). 3845 3846 Collective on MPI_Comm 3847 3848 Input Parameters: 3849 + B - the matrix 3850 . i - the indices into j for the start of each local row (starts with zero) 3851 . j - the column indices for each local row (starts with zero) 3852 - v - optional values in the matrix 3853 3854 Level: developer 3855 3856 Notes: 3857 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3858 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3859 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3860 3861 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3862 3863 The format which is used for the sparse matrix input, is equivalent to a 3864 row-major ordering.. i.e for the following matrix, the input data expected is 3865 as shown: 3866 3867 1 0 0 3868 2 0 3 P0 3869 ------- 3870 4 5 6 P1 3871 3872 Process0 [P0]: rows_owned=[0,1] 3873 i = {0,1,3} [size = nrow+1 = 2+1] 3874 j = {0,0,2} [size = nz = 6] 3875 v = {1,2,3} [size = nz = 6] 3876 3877 Process1 [P1]: rows_owned=[2] 3878 i = {0,3} [size = nrow+1 = 1+1] 3879 j = {0,1,2} [size = nz = 6] 3880 v = {4,5,6} [size = nz = 6] 3881 3882 .keywords: matrix, aij, compressed row, sparse, parallel 3883 3884 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3885 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3886 @*/ 3887 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3888 { 3889 PetscErrorCode ierr; 3890 3891 PetscFunctionBegin; 3892 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3893 PetscFunctionReturn(0); 3894 } 3895 3896 #undef __FUNCT__ 3897 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3898 /*@C 3899 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3900 (the default parallel PETSc format). For good matrix assembly performance 3901 the user should preallocate the matrix storage by setting the parameters 3902 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3903 performance can be increased by more than a factor of 50. 3904 3905 Collective on MPI_Comm 3906 3907 Input Parameters: 3908 + A - the matrix 3909 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3910 (same value is used for all local rows) 3911 . d_nnz - array containing the number of nonzeros in the various rows of the 3912 DIAGONAL portion of the local submatrix (possibly different for each row) 3913 or NULL, if d_nz is used to specify the nonzero structure. 3914 The size of this array is equal to the number of local rows, i.e 'm'. 3915 For matrices that will be factored, you must leave room for (and set) 3916 the diagonal entry even if it is zero. 3917 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3918 submatrix (same value is used for all local rows). 3919 - o_nnz - array containing the number of nonzeros in the various rows of the 3920 OFF-DIAGONAL portion of the local submatrix (possibly different for 3921 each row) or NULL, if o_nz is used to specify the nonzero 3922 structure. The size of this array is equal to the number 3923 of local rows, i.e 'm'. 3924 3925 If the *_nnz parameter is given then the *_nz parameter is ignored 3926 3927 The AIJ format (also called the Yale sparse matrix format or 3928 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3929 storage. The stored row and column indices begin with zero. 3930 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3931 3932 The parallel matrix is partitioned such that the first m0 rows belong to 3933 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3934 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3935 3936 The DIAGONAL portion of the local submatrix of a processor can be defined 3937 as the submatrix which is obtained by extraction the part corresponding to 3938 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3939 first row that belongs to the processor, r2 is the last row belonging to 3940 the this processor, and c1-c2 is range of indices of the local part of a 3941 vector suitable for applying the matrix to. This is an mxn matrix. In the 3942 common case of a square matrix, the row and column ranges are the same and 3943 the DIAGONAL part is also square. The remaining portion of the local 3944 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3945 3946 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3947 3948 You can call MatGetInfo() to get information on how effective the preallocation was; 3949 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3950 You can also run with the option -info and look for messages with the string 3951 malloc in them to see if additional memory allocation was needed. 3952 3953 Example usage: 3954 3955 Consider the following 8x8 matrix with 34 non-zero values, that is 3956 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3957 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3958 as follows: 3959 3960 .vb 3961 1 2 0 | 0 3 0 | 0 4 3962 Proc0 0 5 6 | 7 0 0 | 8 0 3963 9 0 10 | 11 0 0 | 12 0 3964 ------------------------------------- 3965 13 0 14 | 15 16 17 | 0 0 3966 Proc1 0 18 0 | 19 20 21 | 0 0 3967 0 0 0 | 22 23 0 | 24 0 3968 ------------------------------------- 3969 Proc2 25 26 27 | 0 0 28 | 29 0 3970 30 0 0 | 31 32 33 | 0 34 3971 .ve 3972 3973 This can be represented as a collection of submatrices as: 3974 3975 .vb 3976 A B C 3977 D E F 3978 G H I 3979 .ve 3980 3981 Where the submatrices A,B,C are owned by proc0, D,E,F are 3982 owned by proc1, G,H,I are owned by proc2. 3983 3984 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3985 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3986 The 'M','N' parameters are 8,8, and have the same values on all procs. 3987 3988 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3989 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3990 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3991 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3992 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3993 matrix, ans [DF] as another SeqAIJ matrix. 3994 3995 When d_nz, o_nz parameters are specified, d_nz storage elements are 3996 allocated for every row of the local diagonal submatrix, and o_nz 3997 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3998 One way to choose d_nz and o_nz is to use the max nonzerors per local 3999 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4000 In this case, the values of d_nz,o_nz are: 4001 .vb 4002 proc0 : dnz = 2, o_nz = 2 4003 proc1 : dnz = 3, o_nz = 2 4004 proc2 : dnz = 1, o_nz = 4 4005 .ve 4006 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4007 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4008 for proc3. i.e we are using 12+15+10=37 storage locations to store 4009 34 values. 4010 4011 When d_nnz, o_nnz parameters are specified, the storage is specified 4012 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4013 In the above case the values for d_nnz,o_nnz are: 4014 .vb 4015 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4016 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4017 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4018 .ve 4019 Here the space allocated is sum of all the above values i.e 34, and 4020 hence pre-allocation is perfect. 4021 4022 Level: intermediate 4023 4024 .keywords: matrix, aij, compressed row, sparse, parallel 4025 4026 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4027 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4028 @*/ 4029 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4030 { 4031 PetscErrorCode ierr; 4032 4033 PetscFunctionBegin; 4034 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4035 PetscValidType(B,1); 4036 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4037 PetscFunctionReturn(0); 4038 } 4039 4040 #undef __FUNCT__ 4041 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4042 /*@ 4043 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4044 CSR format the local rows. 4045 4046 Collective on MPI_Comm 4047 4048 Input Parameters: 4049 + comm - MPI communicator 4050 . m - number of local rows (Cannot be PETSC_DECIDE) 4051 . n - This value should be the same as the local size used in creating the 4052 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4053 calculated if N is given) For square matrices n is almost always m. 4054 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4055 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4056 . i - row indices 4057 . j - column indices 4058 - a - matrix values 4059 4060 Output Parameter: 4061 . mat - the matrix 4062 4063 Level: intermediate 4064 4065 Notes: 4066 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4067 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4068 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4069 4070 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4071 4072 The format which is used for the sparse matrix input, is equivalent to a 4073 row-major ordering.. i.e for the following matrix, the input data expected is 4074 as shown: 4075 4076 1 0 0 4077 2 0 3 P0 4078 ------- 4079 4 5 6 P1 4080 4081 Process0 [P0]: rows_owned=[0,1] 4082 i = {0,1,3} [size = nrow+1 = 2+1] 4083 j = {0,0,2} [size = nz = 6] 4084 v = {1,2,3} [size = nz = 6] 4085 4086 Process1 [P1]: rows_owned=[2] 4087 i = {0,3} [size = nrow+1 = 1+1] 4088 j = {0,1,2} [size = nz = 6] 4089 v = {4,5,6} [size = nz = 6] 4090 4091 .keywords: matrix, aij, compressed row, sparse, parallel 4092 4093 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4094 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4095 @*/ 4096 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4097 { 4098 PetscErrorCode ierr; 4099 4100 PetscFunctionBegin; 4101 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4102 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4103 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4104 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4105 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4106 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4107 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4108 PetscFunctionReturn(0); 4109 } 4110 4111 #undef __FUNCT__ 4112 #define __FUNCT__ "MatCreateAIJ" 4113 /*@C 4114 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4115 (the default parallel PETSc format). For good matrix assembly performance 4116 the user should preallocate the matrix storage by setting the parameters 4117 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4118 performance can be increased by more than a factor of 50. 4119 4120 Collective on MPI_Comm 4121 4122 Input Parameters: 4123 + comm - MPI communicator 4124 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4125 This value should be the same as the local size used in creating the 4126 y vector for the matrix-vector product y = Ax. 4127 . n - This value should be the same as the local size used in creating the 4128 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4129 calculated if N is given) For square matrices n is almost always m. 4130 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4131 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4132 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4133 (same value is used for all local rows) 4134 . d_nnz - array containing the number of nonzeros in the various rows of the 4135 DIAGONAL portion of the local submatrix (possibly different for each row) 4136 or NULL, if d_nz is used to specify the nonzero structure. 4137 The size of this array is equal to the number of local rows, i.e 'm'. 4138 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4139 submatrix (same value is used for all local rows). 4140 - o_nnz - array containing the number of nonzeros in the various rows of the 4141 OFF-DIAGONAL portion of the local submatrix (possibly different for 4142 each row) or NULL, if o_nz is used to specify the nonzero 4143 structure. The size of this array is equal to the number 4144 of local rows, i.e 'm'. 4145 4146 Output Parameter: 4147 . A - the matrix 4148 4149 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4150 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4151 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4152 4153 Notes: 4154 If the *_nnz parameter is given then the *_nz parameter is ignored 4155 4156 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4157 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4158 storage requirements for this matrix. 4159 4160 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4161 processor than it must be used on all processors that share the object for 4162 that argument. 4163 4164 The user MUST specify either the local or global matrix dimensions 4165 (possibly both). 4166 4167 The parallel matrix is partitioned across processors such that the 4168 first m0 rows belong to process 0, the next m1 rows belong to 4169 process 1, the next m2 rows belong to process 2 etc.. where 4170 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4171 values corresponding to [m x N] submatrix. 4172 4173 The columns are logically partitioned with the n0 columns belonging 4174 to 0th partition, the next n1 columns belonging to the next 4175 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4176 4177 The DIAGONAL portion of the local submatrix on any given processor 4178 is the submatrix corresponding to the rows and columns m,n 4179 corresponding to the given processor. i.e diagonal matrix on 4180 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4181 etc. The remaining portion of the local submatrix [m x (N-n)] 4182 constitute the OFF-DIAGONAL portion. The example below better 4183 illustrates this concept. 4184 4185 For a square global matrix we define each processor's diagonal portion 4186 to be its local rows and the corresponding columns (a square submatrix); 4187 each processor's off-diagonal portion encompasses the remainder of the 4188 local matrix (a rectangular submatrix). 4189 4190 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4191 4192 When calling this routine with a single process communicator, a matrix of 4193 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4194 type of communicator, use the construction mechanism: 4195 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4196 4197 By default, this format uses inodes (identical nodes) when possible. 4198 We search for consecutive rows with the same nonzero structure, thereby 4199 reusing matrix information to achieve increased efficiency. 4200 4201 Options Database Keys: 4202 + -mat_no_inode - Do not use inodes 4203 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4204 - -mat_aij_oneindex - Internally use indexing starting at 1 4205 rather than 0. Note that when calling MatSetValues(), 4206 the user still MUST index entries starting at 0! 4207 4208 4209 Example usage: 4210 4211 Consider the following 8x8 matrix with 34 non-zero values, that is 4212 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4213 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4214 as follows: 4215 4216 .vb 4217 1 2 0 | 0 3 0 | 0 4 4218 Proc0 0 5 6 | 7 0 0 | 8 0 4219 9 0 10 | 11 0 0 | 12 0 4220 ------------------------------------- 4221 13 0 14 | 15 16 17 | 0 0 4222 Proc1 0 18 0 | 19 20 21 | 0 0 4223 0 0 0 | 22 23 0 | 24 0 4224 ------------------------------------- 4225 Proc2 25 26 27 | 0 0 28 | 29 0 4226 30 0 0 | 31 32 33 | 0 34 4227 .ve 4228 4229 This can be represented as a collection of submatrices as: 4230 4231 .vb 4232 A B C 4233 D E F 4234 G H I 4235 .ve 4236 4237 Where the submatrices A,B,C are owned by proc0, D,E,F are 4238 owned by proc1, G,H,I are owned by proc2. 4239 4240 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4241 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4242 The 'M','N' parameters are 8,8, and have the same values on all procs. 4243 4244 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4245 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4246 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4247 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4248 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4249 matrix, ans [DF] as another SeqAIJ matrix. 4250 4251 When d_nz, o_nz parameters are specified, d_nz storage elements are 4252 allocated for every row of the local diagonal submatrix, and o_nz 4253 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4254 One way to choose d_nz and o_nz is to use the max nonzerors per local 4255 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4256 In this case, the values of d_nz,o_nz are: 4257 .vb 4258 proc0 : dnz = 2, o_nz = 2 4259 proc1 : dnz = 3, o_nz = 2 4260 proc2 : dnz = 1, o_nz = 4 4261 .ve 4262 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4263 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4264 for proc3. i.e we are using 12+15+10=37 storage locations to store 4265 34 values. 4266 4267 When d_nnz, o_nnz parameters are specified, the storage is specified 4268 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4269 In the above case the values for d_nnz,o_nnz are: 4270 .vb 4271 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4272 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4273 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4274 .ve 4275 Here the space allocated is sum of all the above values i.e 34, and 4276 hence pre-allocation is perfect. 4277 4278 Level: intermediate 4279 4280 .keywords: matrix, aij, compressed row, sparse, parallel 4281 4282 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4283 MPIAIJ, MatCreateMPIAIJWithArrays() 4284 @*/ 4285 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4286 { 4287 PetscErrorCode ierr; 4288 PetscMPIInt size; 4289 4290 PetscFunctionBegin; 4291 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4292 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4293 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4294 if (size > 1) { 4295 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4296 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4297 } else { 4298 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4299 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4300 } 4301 PetscFunctionReturn(0); 4302 } 4303 4304 #undef __FUNCT__ 4305 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4306 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4307 { 4308 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4309 4310 PetscFunctionBegin; 4311 *Ad = a->A; 4312 *Ao = a->B; 4313 *colmap = a->garray; 4314 PetscFunctionReturn(0); 4315 } 4316 4317 #undef __FUNCT__ 4318 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4319 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4320 { 4321 PetscErrorCode ierr; 4322 PetscInt i; 4323 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4324 4325 PetscFunctionBegin; 4326 if (coloring->ctype == IS_COLORING_GLOBAL) { 4327 ISColoringValue *allcolors,*colors; 4328 ISColoring ocoloring; 4329 4330 /* set coloring for diagonal portion */ 4331 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4332 4333 /* set coloring for off-diagonal portion */ 4334 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4335 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4336 for (i=0; i<a->B->cmap->n; i++) { 4337 colors[i] = allcolors[a->garray[i]]; 4338 } 4339 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4340 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4341 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4342 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4343 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4344 ISColoringValue *colors; 4345 PetscInt *larray; 4346 ISColoring ocoloring; 4347 4348 /* set coloring for diagonal portion */ 4349 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4350 for (i=0; i<a->A->cmap->n; i++) { 4351 larray[i] = i + A->cmap->rstart; 4352 } 4353 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4354 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4355 for (i=0; i<a->A->cmap->n; i++) { 4356 colors[i] = coloring->colors[larray[i]]; 4357 } 4358 ierr = PetscFree(larray);CHKERRQ(ierr); 4359 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4360 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4361 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4362 4363 /* set coloring for off-diagonal portion */ 4364 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4365 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4366 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4367 for (i=0; i<a->B->cmap->n; i++) { 4368 colors[i] = coloring->colors[larray[i]]; 4369 } 4370 ierr = PetscFree(larray);CHKERRQ(ierr); 4371 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4372 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4373 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4374 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4375 PetscFunctionReturn(0); 4376 } 4377 4378 #undef __FUNCT__ 4379 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4380 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4381 { 4382 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4383 PetscErrorCode ierr; 4384 4385 PetscFunctionBegin; 4386 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4387 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4388 PetscFunctionReturn(0); 4389 } 4390 4391 #undef __FUNCT__ 4392 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4393 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4394 { 4395 PetscErrorCode ierr; 4396 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4397 PetscInt *indx; 4398 4399 PetscFunctionBegin; 4400 /* This routine will ONLY return MPIAIJ type matrix */ 4401 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4402 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4403 if (n == PETSC_DECIDE) { 4404 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4405 } 4406 /* Check sum(n) = N */ 4407 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4408 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4409 4410 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4411 rstart -= m; 4412 4413 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4414 for (i=0; i<m; i++) { 4415 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4416 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4417 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4418 } 4419 4420 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4421 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4422 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4423 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4424 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4425 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4426 PetscFunctionReturn(0); 4427 } 4428 4429 #undef __FUNCT__ 4430 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4431 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4432 { 4433 PetscErrorCode ierr; 4434 PetscInt m,N,i,rstart,nnz,Ii; 4435 PetscInt *indx; 4436 PetscScalar *values; 4437 4438 PetscFunctionBegin; 4439 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4440 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4441 for (i=0; i<m; i++) { 4442 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4443 Ii = i + rstart; 4444 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4445 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4446 } 4447 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4448 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4449 PetscFunctionReturn(0); 4450 } 4451 4452 #undef __FUNCT__ 4453 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4454 /*@ 4455 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4456 matrices from each processor 4457 4458 Collective on MPI_Comm 4459 4460 Input Parameters: 4461 + comm - the communicators the parallel matrix will live on 4462 . inmat - the input sequential matrices 4463 . n - number of local columns (or PETSC_DECIDE) 4464 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4465 4466 Output Parameter: 4467 . outmat - the parallel matrix generated 4468 4469 Level: advanced 4470 4471 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4472 4473 @*/ 4474 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4475 { 4476 PetscErrorCode ierr; 4477 4478 PetscFunctionBegin; 4479 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4480 if (scall == MAT_INITIAL_MATRIX) { 4481 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4482 } 4483 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4484 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4485 PetscFunctionReturn(0); 4486 } 4487 4488 #undef __FUNCT__ 4489 #define __FUNCT__ "MatFileSplit" 4490 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4491 { 4492 PetscErrorCode ierr; 4493 PetscMPIInt rank; 4494 PetscInt m,N,i,rstart,nnz; 4495 size_t len; 4496 const PetscInt *indx; 4497 PetscViewer out; 4498 char *name; 4499 Mat B; 4500 const PetscScalar *values; 4501 4502 PetscFunctionBegin; 4503 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4504 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4505 /* Should this be the type of the diagonal block of A? */ 4506 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4507 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4508 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4509 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4510 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4511 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4512 for (i=0; i<m; i++) { 4513 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4514 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4515 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4516 } 4517 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4518 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4519 4520 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4521 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4522 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4523 sprintf(name,"%s.%d",outfile,rank); 4524 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4525 ierr = PetscFree(name);CHKERRQ(ierr); 4526 ierr = MatView(B,out);CHKERRQ(ierr); 4527 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4528 ierr = MatDestroy(&B);CHKERRQ(ierr); 4529 PetscFunctionReturn(0); 4530 } 4531 4532 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4533 #undef __FUNCT__ 4534 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4535 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4536 { 4537 PetscErrorCode ierr; 4538 Mat_Merge_SeqsToMPI *merge; 4539 PetscContainer container; 4540 4541 PetscFunctionBegin; 4542 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4543 if (container) { 4544 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4545 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4546 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4547 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4548 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4549 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4550 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4551 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4552 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4553 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4554 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4555 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4556 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4557 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4558 ierr = PetscFree(merge);CHKERRQ(ierr); 4559 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4560 } 4561 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4562 PetscFunctionReturn(0); 4563 } 4564 4565 #include <../src/mat/utils/freespace.h> 4566 #include <petscbt.h> 4567 4568 #undef __FUNCT__ 4569 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4570 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4571 { 4572 PetscErrorCode ierr; 4573 MPI_Comm comm; 4574 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4575 PetscMPIInt size,rank,taga,*len_s; 4576 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4577 PetscInt proc,m; 4578 PetscInt **buf_ri,**buf_rj; 4579 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4580 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4581 MPI_Request *s_waits,*r_waits; 4582 MPI_Status *status; 4583 MatScalar *aa=a->a; 4584 MatScalar **abuf_r,*ba_i; 4585 Mat_Merge_SeqsToMPI *merge; 4586 PetscContainer container; 4587 4588 PetscFunctionBegin; 4589 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4590 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4591 4592 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4593 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4594 4595 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4596 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4597 4598 bi = merge->bi; 4599 bj = merge->bj; 4600 buf_ri = merge->buf_ri; 4601 buf_rj = merge->buf_rj; 4602 4603 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4604 owners = merge->rowmap->range; 4605 len_s = merge->len_s; 4606 4607 /* send and recv matrix values */ 4608 /*-----------------------------*/ 4609 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4610 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4611 4612 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4613 for (proc=0,k=0; proc<size; proc++) { 4614 if (!len_s[proc]) continue; 4615 i = owners[proc]; 4616 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4617 k++; 4618 } 4619 4620 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4621 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4622 ierr = PetscFree(status);CHKERRQ(ierr); 4623 4624 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4625 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4626 4627 /* insert mat values of mpimat */ 4628 /*----------------------------*/ 4629 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4630 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4631 4632 for (k=0; k<merge->nrecv; k++) { 4633 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4634 nrows = *(buf_ri_k[k]); 4635 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4636 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4637 } 4638 4639 /* set values of ba */ 4640 m = merge->rowmap->n; 4641 for (i=0; i<m; i++) { 4642 arow = owners[rank] + i; 4643 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4644 bnzi = bi[i+1] - bi[i]; 4645 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4646 4647 /* add local non-zero vals of this proc's seqmat into ba */ 4648 anzi = ai[arow+1] - ai[arow]; 4649 aj = a->j + ai[arow]; 4650 aa = a->a + ai[arow]; 4651 nextaj = 0; 4652 for (j=0; nextaj<anzi; j++) { 4653 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4654 ba_i[j] += aa[nextaj++]; 4655 } 4656 } 4657 4658 /* add received vals into ba */ 4659 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4660 /* i-th row */ 4661 if (i == *nextrow[k]) { 4662 anzi = *(nextai[k]+1) - *nextai[k]; 4663 aj = buf_rj[k] + *(nextai[k]); 4664 aa = abuf_r[k] + *(nextai[k]); 4665 nextaj = 0; 4666 for (j=0; nextaj<anzi; j++) { 4667 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4668 ba_i[j] += aa[nextaj++]; 4669 } 4670 } 4671 nextrow[k]++; nextai[k]++; 4672 } 4673 } 4674 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4675 } 4676 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4677 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4678 4679 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4680 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4681 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4682 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4683 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4684 PetscFunctionReturn(0); 4685 } 4686 4687 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4688 4689 #undef __FUNCT__ 4690 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4691 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4692 { 4693 PetscErrorCode ierr; 4694 Mat B_mpi; 4695 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4696 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4697 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4698 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4699 PetscInt len,proc,*dnz,*onz,bs,cbs; 4700 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4701 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4702 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4703 MPI_Status *status; 4704 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4705 PetscBT lnkbt; 4706 Mat_Merge_SeqsToMPI *merge; 4707 PetscContainer container; 4708 4709 PetscFunctionBegin; 4710 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4711 4712 /* make sure it is a PETSc comm */ 4713 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4714 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4715 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4716 4717 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4718 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4719 4720 /* determine row ownership */ 4721 /*---------------------------------------------------------*/ 4722 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4723 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4724 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4725 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4726 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4727 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4728 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4729 4730 m = merge->rowmap->n; 4731 owners = merge->rowmap->range; 4732 4733 /* determine the number of messages to send, their lengths */ 4734 /*---------------------------------------------------------*/ 4735 len_s = merge->len_s; 4736 4737 len = 0; /* length of buf_si[] */ 4738 merge->nsend = 0; 4739 for (proc=0; proc<size; proc++) { 4740 len_si[proc] = 0; 4741 if (proc == rank) { 4742 len_s[proc] = 0; 4743 } else { 4744 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4745 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4746 } 4747 if (len_s[proc]) { 4748 merge->nsend++; 4749 nrows = 0; 4750 for (i=owners[proc]; i<owners[proc+1]; i++) { 4751 if (ai[i+1] > ai[i]) nrows++; 4752 } 4753 len_si[proc] = 2*(nrows+1); 4754 len += len_si[proc]; 4755 } 4756 } 4757 4758 /* determine the number and length of messages to receive for ij-structure */ 4759 /*-------------------------------------------------------------------------*/ 4760 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4761 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4762 4763 /* post the Irecv of j-structure */ 4764 /*-------------------------------*/ 4765 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4766 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4767 4768 /* post the Isend of j-structure */ 4769 /*--------------------------------*/ 4770 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4771 4772 for (proc=0, k=0; proc<size; proc++) { 4773 if (!len_s[proc]) continue; 4774 i = owners[proc]; 4775 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4776 k++; 4777 } 4778 4779 /* receives and sends of j-structure are complete */ 4780 /*------------------------------------------------*/ 4781 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4782 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4783 4784 /* send and recv i-structure */ 4785 /*---------------------------*/ 4786 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4787 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4788 4789 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4790 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4791 for (proc=0,k=0; proc<size; proc++) { 4792 if (!len_s[proc]) continue; 4793 /* form outgoing message for i-structure: 4794 buf_si[0]: nrows to be sent 4795 [1:nrows]: row index (global) 4796 [nrows+1:2*nrows+1]: i-structure index 4797 */ 4798 /*-------------------------------------------*/ 4799 nrows = len_si[proc]/2 - 1; 4800 buf_si_i = buf_si + nrows+1; 4801 buf_si[0] = nrows; 4802 buf_si_i[0] = 0; 4803 nrows = 0; 4804 for (i=owners[proc]; i<owners[proc+1]; i++) { 4805 anzi = ai[i+1] - ai[i]; 4806 if (anzi) { 4807 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4808 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4809 nrows++; 4810 } 4811 } 4812 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4813 k++; 4814 buf_si += len_si[proc]; 4815 } 4816 4817 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4818 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4819 4820 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4821 for (i=0; i<merge->nrecv; i++) { 4822 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4823 } 4824 4825 ierr = PetscFree(len_si);CHKERRQ(ierr); 4826 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4827 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4828 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4829 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4830 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4831 ierr = PetscFree(status);CHKERRQ(ierr); 4832 4833 /* compute a local seq matrix in each processor */ 4834 /*----------------------------------------------*/ 4835 /* allocate bi array and free space for accumulating nonzero column info */ 4836 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4837 bi[0] = 0; 4838 4839 /* create and initialize a linked list */ 4840 nlnk = N+1; 4841 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4842 4843 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4844 len = ai[owners[rank+1]] - ai[owners[rank]]; 4845 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4846 4847 current_space = free_space; 4848 4849 /* determine symbolic info for each local row */ 4850 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4851 4852 for (k=0; k<merge->nrecv; k++) { 4853 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4854 nrows = *buf_ri_k[k]; 4855 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4856 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4857 } 4858 4859 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4860 len = 0; 4861 for (i=0; i<m; i++) { 4862 bnzi = 0; 4863 /* add local non-zero cols of this proc's seqmat into lnk */ 4864 arow = owners[rank] + i; 4865 anzi = ai[arow+1] - ai[arow]; 4866 aj = a->j + ai[arow]; 4867 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4868 bnzi += nlnk; 4869 /* add received col data into lnk */ 4870 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4871 if (i == *nextrow[k]) { /* i-th row */ 4872 anzi = *(nextai[k]+1) - *nextai[k]; 4873 aj = buf_rj[k] + *nextai[k]; 4874 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4875 bnzi += nlnk; 4876 nextrow[k]++; nextai[k]++; 4877 } 4878 } 4879 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4880 4881 /* if free space is not available, make more free space */ 4882 if (current_space->local_remaining<bnzi) { 4883 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4884 nspacedouble++; 4885 } 4886 /* copy data into free space, then initialize lnk */ 4887 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4888 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4889 4890 current_space->array += bnzi; 4891 current_space->local_used += bnzi; 4892 current_space->local_remaining -= bnzi; 4893 4894 bi[i+1] = bi[i] + bnzi; 4895 } 4896 4897 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4898 4899 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4900 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4901 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4902 4903 /* create symbolic parallel matrix B_mpi */ 4904 /*---------------------------------------*/ 4905 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4906 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4907 if (n==PETSC_DECIDE) { 4908 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4909 } else { 4910 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4911 } 4912 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4913 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4914 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4915 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4916 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4917 4918 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4919 B_mpi->assembled = PETSC_FALSE; 4920 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4921 merge->bi = bi; 4922 merge->bj = bj; 4923 merge->buf_ri = buf_ri; 4924 merge->buf_rj = buf_rj; 4925 merge->coi = NULL; 4926 merge->coj = NULL; 4927 merge->owners_co = NULL; 4928 4929 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4930 4931 /* attach the supporting struct to B_mpi for reuse */ 4932 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4933 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4934 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4935 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4936 *mpimat = B_mpi; 4937 4938 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4939 PetscFunctionReturn(0); 4940 } 4941 4942 #undef __FUNCT__ 4943 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4944 /*@C 4945 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4946 matrices from each processor 4947 4948 Collective on MPI_Comm 4949 4950 Input Parameters: 4951 + comm - the communicators the parallel matrix will live on 4952 . seqmat - the input sequential matrices 4953 . m - number of local rows (or PETSC_DECIDE) 4954 . n - number of local columns (or PETSC_DECIDE) 4955 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4956 4957 Output Parameter: 4958 . mpimat - the parallel matrix generated 4959 4960 Level: advanced 4961 4962 Notes: 4963 The dimensions of the sequential matrix in each processor MUST be the same. 4964 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4965 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4966 @*/ 4967 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4968 { 4969 PetscErrorCode ierr; 4970 PetscMPIInt size; 4971 4972 PetscFunctionBegin; 4973 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4974 if (size == 1) { 4975 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4976 if (scall == MAT_INITIAL_MATRIX) { 4977 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4978 } else { 4979 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4980 } 4981 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4982 PetscFunctionReturn(0); 4983 } 4984 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4985 if (scall == MAT_INITIAL_MATRIX) { 4986 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4987 } 4988 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4989 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4990 PetscFunctionReturn(0); 4991 } 4992 4993 #undef __FUNCT__ 4994 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4995 /*@ 4996 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4997 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4998 with MatGetSize() 4999 5000 Not Collective 5001 5002 Input Parameters: 5003 + A - the matrix 5004 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5005 5006 Output Parameter: 5007 . A_loc - the local sequential matrix generated 5008 5009 Level: developer 5010 5011 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5012 5013 @*/ 5014 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5015 { 5016 PetscErrorCode ierr; 5017 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5018 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5019 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5020 MatScalar *aa=a->a,*ba=b->a,*cam; 5021 PetscScalar *ca; 5022 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5023 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5024 PetscBool match; 5025 5026 PetscFunctionBegin; 5027 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5028 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5029 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5030 if (scall == MAT_INITIAL_MATRIX) { 5031 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5032 ci[0] = 0; 5033 for (i=0; i<am; i++) { 5034 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5035 } 5036 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5037 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5038 k = 0; 5039 for (i=0; i<am; i++) { 5040 ncols_o = bi[i+1] - bi[i]; 5041 ncols_d = ai[i+1] - ai[i]; 5042 /* off-diagonal portion of A */ 5043 for (jo=0; jo<ncols_o; jo++) { 5044 col = cmap[*bj]; 5045 if (col >= cstart) break; 5046 cj[k] = col; bj++; 5047 ca[k++] = *ba++; 5048 } 5049 /* diagonal portion of A */ 5050 for (j=0; j<ncols_d; j++) { 5051 cj[k] = cstart + *aj++; 5052 ca[k++] = *aa++; 5053 } 5054 /* off-diagonal portion of A */ 5055 for (j=jo; j<ncols_o; j++) { 5056 cj[k] = cmap[*bj++]; 5057 ca[k++] = *ba++; 5058 } 5059 } 5060 /* put together the new matrix */ 5061 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5062 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5063 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5064 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5065 mat->free_a = PETSC_TRUE; 5066 mat->free_ij = PETSC_TRUE; 5067 mat->nonew = 0; 5068 } else if (scall == MAT_REUSE_MATRIX) { 5069 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5070 ci = mat->i; cj = mat->j; cam = mat->a; 5071 for (i=0; i<am; i++) { 5072 /* off-diagonal portion of A */ 5073 ncols_o = bi[i+1] - bi[i]; 5074 for (jo=0; jo<ncols_o; jo++) { 5075 col = cmap[*bj]; 5076 if (col >= cstart) break; 5077 *cam++ = *ba++; bj++; 5078 } 5079 /* diagonal portion of A */ 5080 ncols_d = ai[i+1] - ai[i]; 5081 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5082 /* off-diagonal portion of A */ 5083 for (j=jo; j<ncols_o; j++) { 5084 *cam++ = *ba++; bj++; 5085 } 5086 } 5087 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5088 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5089 PetscFunctionReturn(0); 5090 } 5091 5092 #undef __FUNCT__ 5093 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5094 /*@C 5095 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5096 5097 Not Collective 5098 5099 Input Parameters: 5100 + A - the matrix 5101 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5102 - row, col - index sets of rows and columns to extract (or NULL) 5103 5104 Output Parameter: 5105 . A_loc - the local sequential matrix generated 5106 5107 Level: developer 5108 5109 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5110 5111 @*/ 5112 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5113 { 5114 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5115 PetscErrorCode ierr; 5116 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5117 IS isrowa,iscola; 5118 Mat *aloc; 5119 PetscBool match; 5120 5121 PetscFunctionBegin; 5122 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5123 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5124 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5125 if (!row) { 5126 start = A->rmap->rstart; end = A->rmap->rend; 5127 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5128 } else { 5129 isrowa = *row; 5130 } 5131 if (!col) { 5132 start = A->cmap->rstart; 5133 cmap = a->garray; 5134 nzA = a->A->cmap->n; 5135 nzB = a->B->cmap->n; 5136 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5137 ncols = 0; 5138 for (i=0; i<nzB; i++) { 5139 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5140 else break; 5141 } 5142 imark = i; 5143 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5144 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5145 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5146 } else { 5147 iscola = *col; 5148 } 5149 if (scall != MAT_INITIAL_MATRIX) { 5150 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5151 aloc[0] = *A_loc; 5152 } 5153 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5154 *A_loc = aloc[0]; 5155 ierr = PetscFree(aloc);CHKERRQ(ierr); 5156 if (!row) { 5157 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5158 } 5159 if (!col) { 5160 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5161 } 5162 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5163 PetscFunctionReturn(0); 5164 } 5165 5166 #undef __FUNCT__ 5167 #define __FUNCT__ "MatGetBrowsOfAcols" 5168 /*@C 5169 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5170 5171 Collective on Mat 5172 5173 Input Parameters: 5174 + A,B - the matrices in mpiaij format 5175 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5176 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5177 5178 Output Parameter: 5179 + rowb, colb - index sets of rows and columns of B to extract 5180 - B_seq - the sequential matrix generated 5181 5182 Level: developer 5183 5184 @*/ 5185 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5186 { 5187 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5188 PetscErrorCode ierr; 5189 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5190 IS isrowb,iscolb; 5191 Mat *bseq=NULL; 5192 5193 PetscFunctionBegin; 5194 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5195 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5196 } 5197 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5198 5199 if (scall == MAT_INITIAL_MATRIX) { 5200 start = A->cmap->rstart; 5201 cmap = a->garray; 5202 nzA = a->A->cmap->n; 5203 nzB = a->B->cmap->n; 5204 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5205 ncols = 0; 5206 for (i=0; i<nzB; i++) { /* row < local row index */ 5207 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5208 else break; 5209 } 5210 imark = i; 5211 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5212 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5213 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5214 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5215 } else { 5216 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5217 isrowb = *rowb; iscolb = *colb; 5218 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5219 bseq[0] = *B_seq; 5220 } 5221 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5222 *B_seq = bseq[0]; 5223 ierr = PetscFree(bseq);CHKERRQ(ierr); 5224 if (!rowb) { 5225 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5226 } else { 5227 *rowb = isrowb; 5228 } 5229 if (!colb) { 5230 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5231 } else { 5232 *colb = iscolb; 5233 } 5234 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5235 PetscFunctionReturn(0); 5236 } 5237 5238 #undef __FUNCT__ 5239 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5240 /* 5241 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5242 of the OFF-DIAGONAL portion of local A 5243 5244 Collective on Mat 5245 5246 Input Parameters: 5247 + A,B - the matrices in mpiaij format 5248 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5249 5250 Output Parameter: 5251 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5252 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5253 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5254 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5255 5256 Level: developer 5257 5258 */ 5259 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5260 { 5261 VecScatter_MPI_General *gen_to,*gen_from; 5262 PetscErrorCode ierr; 5263 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5264 Mat_SeqAIJ *b_oth; 5265 VecScatter ctx =a->Mvctx; 5266 MPI_Comm comm; 5267 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5268 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5269 PetscScalar *rvalues,*svalues; 5270 MatScalar *b_otha,*bufa,*bufA; 5271 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5272 MPI_Request *rwaits = NULL,*swaits = NULL; 5273 MPI_Status *sstatus,rstatus; 5274 PetscMPIInt jj; 5275 PetscInt *cols,sbs,rbs; 5276 PetscScalar *vals; 5277 5278 PetscFunctionBegin; 5279 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5280 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5281 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5282 } 5283 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5284 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5285 5286 gen_to = (VecScatter_MPI_General*)ctx->todata; 5287 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5288 rvalues = gen_from->values; /* holds the length of receiving row */ 5289 svalues = gen_to->values; /* holds the length of sending row */ 5290 nrecvs = gen_from->n; 5291 nsends = gen_to->n; 5292 5293 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5294 srow = gen_to->indices; /* local row index to be sent */ 5295 sstarts = gen_to->starts; 5296 sprocs = gen_to->procs; 5297 sstatus = gen_to->sstatus; 5298 sbs = gen_to->bs; 5299 rstarts = gen_from->starts; 5300 rprocs = gen_from->procs; 5301 rbs = gen_from->bs; 5302 5303 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5304 if (scall == MAT_INITIAL_MATRIX) { 5305 /* i-array */ 5306 /*---------*/ 5307 /* post receives */ 5308 for (i=0; i<nrecvs; i++) { 5309 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5310 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5311 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5312 } 5313 5314 /* pack the outgoing message */ 5315 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5316 5317 sstartsj[0] = 0; 5318 rstartsj[0] = 0; 5319 len = 0; /* total length of j or a array to be sent */ 5320 k = 0; 5321 for (i=0; i<nsends; i++) { 5322 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5323 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5324 for (j=0; j<nrows; j++) { 5325 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5326 for (l=0; l<sbs; l++) { 5327 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5328 5329 rowlen[j*sbs+l] = ncols; 5330 5331 len += ncols; 5332 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5333 } 5334 k++; 5335 } 5336 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5337 5338 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5339 } 5340 /* recvs and sends of i-array are completed */ 5341 i = nrecvs; 5342 while (i--) { 5343 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5344 } 5345 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5346 5347 /* allocate buffers for sending j and a arrays */ 5348 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5349 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5350 5351 /* create i-array of B_oth */ 5352 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5353 5354 b_othi[0] = 0; 5355 len = 0; /* total length of j or a array to be received */ 5356 k = 0; 5357 for (i=0; i<nrecvs; i++) { 5358 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5359 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5360 for (j=0; j<nrows; j++) { 5361 b_othi[k+1] = b_othi[k] + rowlen[j]; 5362 len += rowlen[j]; k++; 5363 } 5364 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5365 } 5366 5367 /* allocate space for j and a arrrays of B_oth */ 5368 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5369 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5370 5371 /* j-array */ 5372 /*---------*/ 5373 /* post receives of j-array */ 5374 for (i=0; i<nrecvs; i++) { 5375 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5376 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5377 } 5378 5379 /* pack the outgoing message j-array */ 5380 k = 0; 5381 for (i=0; i<nsends; i++) { 5382 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5383 bufJ = bufj+sstartsj[i]; 5384 for (j=0; j<nrows; j++) { 5385 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5386 for (ll=0; ll<sbs; ll++) { 5387 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5388 for (l=0; l<ncols; l++) { 5389 *bufJ++ = cols[l]; 5390 } 5391 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5392 } 5393 } 5394 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5395 } 5396 5397 /* recvs and sends of j-array are completed */ 5398 i = nrecvs; 5399 while (i--) { 5400 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5401 } 5402 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5403 } else if (scall == MAT_REUSE_MATRIX) { 5404 sstartsj = *startsj_s; 5405 rstartsj = *startsj_r; 5406 bufa = *bufa_ptr; 5407 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5408 b_otha = b_oth->a; 5409 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5410 5411 /* a-array */ 5412 /*---------*/ 5413 /* post receives of a-array */ 5414 for (i=0; i<nrecvs; i++) { 5415 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5416 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5417 } 5418 5419 /* pack the outgoing message a-array */ 5420 k = 0; 5421 for (i=0; i<nsends; i++) { 5422 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5423 bufA = bufa+sstartsj[i]; 5424 for (j=0; j<nrows; j++) { 5425 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5426 for (ll=0; ll<sbs; ll++) { 5427 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5428 for (l=0; l<ncols; l++) { 5429 *bufA++ = vals[l]; 5430 } 5431 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5432 } 5433 } 5434 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5435 } 5436 /* recvs and sends of a-array are completed */ 5437 i = nrecvs; 5438 while (i--) { 5439 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5440 } 5441 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5442 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5443 5444 if (scall == MAT_INITIAL_MATRIX) { 5445 /* put together the new matrix */ 5446 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5447 5448 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5449 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5450 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5451 b_oth->free_a = PETSC_TRUE; 5452 b_oth->free_ij = PETSC_TRUE; 5453 b_oth->nonew = 0; 5454 5455 ierr = PetscFree(bufj);CHKERRQ(ierr); 5456 if (!startsj_s || !bufa_ptr) { 5457 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5458 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5459 } else { 5460 *startsj_s = sstartsj; 5461 *startsj_r = rstartsj; 5462 *bufa_ptr = bufa; 5463 } 5464 } 5465 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5466 PetscFunctionReturn(0); 5467 } 5468 5469 #undef __FUNCT__ 5470 #define __FUNCT__ "MatGetCommunicationStructs" 5471 /*@C 5472 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5473 5474 Not Collective 5475 5476 Input Parameters: 5477 . A - The matrix in mpiaij format 5478 5479 Output Parameter: 5480 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5481 . colmap - A map from global column index to local index into lvec 5482 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5483 5484 Level: developer 5485 5486 @*/ 5487 #if defined(PETSC_USE_CTABLE) 5488 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5489 #else 5490 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5491 #endif 5492 { 5493 Mat_MPIAIJ *a; 5494 5495 PetscFunctionBegin; 5496 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5497 PetscValidPointer(lvec, 2); 5498 PetscValidPointer(colmap, 3); 5499 PetscValidPointer(multScatter, 4); 5500 a = (Mat_MPIAIJ*) A->data; 5501 if (lvec) *lvec = a->lvec; 5502 if (colmap) *colmap = a->colmap; 5503 if (multScatter) *multScatter = a->Mvctx; 5504 PetscFunctionReturn(0); 5505 } 5506 5507 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5508 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5509 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5510 5511 #undef __FUNCT__ 5512 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5513 /* 5514 Computes (B'*A')' since computing B*A directly is untenable 5515 5516 n p p 5517 ( ) ( ) ( ) 5518 m ( A ) * n ( B ) = m ( C ) 5519 ( ) ( ) ( ) 5520 5521 */ 5522 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5523 { 5524 PetscErrorCode ierr; 5525 Mat At,Bt,Ct; 5526 5527 PetscFunctionBegin; 5528 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5529 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5530 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5531 ierr = MatDestroy(&At);CHKERRQ(ierr); 5532 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5533 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5534 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5535 PetscFunctionReturn(0); 5536 } 5537 5538 #undef __FUNCT__ 5539 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5540 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5541 { 5542 PetscErrorCode ierr; 5543 PetscInt m=A->rmap->n,n=B->cmap->n; 5544 Mat Cmat; 5545 5546 PetscFunctionBegin; 5547 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5548 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5549 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5550 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5551 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5552 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5553 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5554 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5555 5556 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5557 5558 *C = Cmat; 5559 PetscFunctionReturn(0); 5560 } 5561 5562 /* ----------------------------------------------------------------*/ 5563 #undef __FUNCT__ 5564 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5565 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5566 { 5567 PetscErrorCode ierr; 5568 5569 PetscFunctionBegin; 5570 if (scall == MAT_INITIAL_MATRIX) { 5571 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5572 } 5573 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5574 PetscFunctionReturn(0); 5575 } 5576 5577 #if defined(PETSC_HAVE_MUMPS) 5578 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5579 #endif 5580 #if defined(PETSC_HAVE_PASTIX) 5581 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5582 #endif 5583 #if defined(PETSC_HAVE_SUPERLU_DIST) 5584 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5585 #endif 5586 #if defined(PETSC_HAVE_CLIQUE) 5587 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5588 #endif 5589 5590 /*MC 5591 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5592 5593 Options Database Keys: 5594 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5595 5596 Level: beginner 5597 5598 .seealso: MatCreateAIJ() 5599 M*/ 5600 5601 #undef __FUNCT__ 5602 #define __FUNCT__ "MatCreate_MPIAIJ" 5603 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5604 { 5605 Mat_MPIAIJ *b; 5606 PetscErrorCode ierr; 5607 PetscMPIInt size; 5608 5609 PetscFunctionBegin; 5610 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5611 5612 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5613 B->data = (void*)b; 5614 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5615 B->assembled = PETSC_FALSE; 5616 B->insertmode = NOT_SET_VALUES; 5617 b->size = size; 5618 5619 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5620 5621 /* build cache for off array entries formed */ 5622 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5623 5624 b->donotstash = PETSC_FALSE; 5625 b->colmap = 0; 5626 b->garray = 0; 5627 b->roworiented = PETSC_TRUE; 5628 5629 /* stuff used for matrix vector multiply */ 5630 b->lvec = NULL; 5631 b->Mvctx = NULL; 5632 5633 /* stuff for MatGetRow() */ 5634 b->rowindices = 0; 5635 b->rowvalues = 0; 5636 b->getrowactive = PETSC_FALSE; 5637 5638 /* flexible pointer used in CUSP/CUSPARSE classes */ 5639 b->spptr = NULL; 5640 5641 #if defined(PETSC_HAVE_MUMPS) 5642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C","MatGetFactor_aij_mumps",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5643 #endif 5644 #if defined(PETSC_HAVE_PASTIX) 5645 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C","MatGetFactor_mpiaij_pastix",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5646 #endif 5647 #if defined(PETSC_HAVE_SUPERLU_DIST) 5648 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C","MatGetFactor_mpiaij_superlu_dist",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5649 #endif 5650 #if defined(PETSC_HAVE_CLIQUE) 5651 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C","MatGetFactor_aij_clique",MatGetFactor_aij_clique);CHKERRQ(ierr); 5652 #endif 5653 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_MPIAIJ",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5654 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_MPIAIJ",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5655 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C","MatGetDiagonalBlock_MPIAIJ",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5656 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C","MatIsTranspose_MPIAIJ",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5657 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C","MatMPIAIJSetPreallocation_MPIAIJ",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5658 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C","MatMPIAIJSetPreallocationCSR_MPIAIJ",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5659 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C","MatDiagonalScaleLocal_MPIAIJ",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5660 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C","MatConvert_MPIAIJ_MPIAIJPERM",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5661 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C","MatConvert_MPIAIJ_MPIAIJCRL",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5662 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C","MatConvert_MPIAIJ_MPISBAIJ",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C","MatMatMult_MPIDense_MPIAIJ",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C","MatMatMultSymbolic_MPIDense_MPIAIJ",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5665 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C","MatMatMultNumeric_MPIDense_MPIAIJ",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5666 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5667 PetscFunctionReturn(0); 5668 } 5669 5670 #undef __FUNCT__ 5671 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5672 /*@ 5673 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5674 and "off-diagonal" part of the matrix in CSR format. 5675 5676 Collective on MPI_Comm 5677 5678 Input Parameters: 5679 + comm - MPI communicator 5680 . m - number of local rows (Cannot be PETSC_DECIDE) 5681 . n - This value should be the same as the local size used in creating the 5682 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5683 calculated if N is given) For square matrices n is almost always m. 5684 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5685 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5686 . i - row indices for "diagonal" portion of matrix 5687 . j - column indices 5688 . a - matrix values 5689 . oi - row indices for "off-diagonal" portion of matrix 5690 . oj - column indices 5691 - oa - matrix values 5692 5693 Output Parameter: 5694 . mat - the matrix 5695 5696 Level: advanced 5697 5698 Notes: 5699 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5700 must free the arrays once the matrix has been destroyed and not before. 5701 5702 The i and j indices are 0 based 5703 5704 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5705 5706 This sets local rows and cannot be used to set off-processor values. 5707 5708 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5709 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5710 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5711 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5712 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5713 communication if it is known that only local entries will be set. 5714 5715 .keywords: matrix, aij, compressed row, sparse, parallel 5716 5717 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5718 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5719 @*/ 5720 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5721 { 5722 PetscErrorCode ierr; 5723 Mat_MPIAIJ *maij; 5724 5725 PetscFunctionBegin; 5726 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5727 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5728 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5729 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5730 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5731 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5732 maij = (Mat_MPIAIJ*) (*mat)->data; 5733 5734 (*mat)->preallocated = PETSC_TRUE; 5735 5736 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5737 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5738 5739 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5740 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5741 5742 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5743 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5744 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5745 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5746 5747 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5748 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5749 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5750 PetscFunctionReturn(0); 5751 } 5752 5753 /* 5754 Special version for direct calls from Fortran 5755 */ 5756 #include <petsc-private/fortranimpl.h> 5757 5758 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5759 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5760 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5761 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5762 #endif 5763 5764 /* Change these macros so can be used in void function */ 5765 #undef CHKERRQ 5766 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5767 #undef SETERRQ2 5768 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5769 #undef SETERRQ3 5770 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5771 #undef SETERRQ 5772 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5773 5774 #undef __FUNCT__ 5775 #define __FUNCT__ "matsetvaluesmpiaij_" 5776 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5777 { 5778 Mat mat = *mmat; 5779 PetscInt m = *mm, n = *mn; 5780 InsertMode addv = *maddv; 5781 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5782 PetscScalar value; 5783 PetscErrorCode ierr; 5784 5785 MatCheckPreallocated(mat,1); 5786 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5787 5788 #if defined(PETSC_USE_DEBUG) 5789 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5790 #endif 5791 { 5792 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5793 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5794 PetscBool roworiented = aij->roworiented; 5795 5796 /* Some Variables required in the macro */ 5797 Mat A = aij->A; 5798 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5799 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5800 MatScalar *aa = a->a; 5801 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5802 Mat B = aij->B; 5803 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5804 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5805 MatScalar *ba = b->a; 5806 5807 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5808 PetscInt nonew = a->nonew; 5809 MatScalar *ap1,*ap2; 5810 5811 PetscFunctionBegin; 5812 for (i=0; i<m; i++) { 5813 if (im[i] < 0) continue; 5814 #if defined(PETSC_USE_DEBUG) 5815 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5816 #endif 5817 if (im[i] >= rstart && im[i] < rend) { 5818 row = im[i] - rstart; 5819 lastcol1 = -1; 5820 rp1 = aj + ai[row]; 5821 ap1 = aa + ai[row]; 5822 rmax1 = aimax[row]; 5823 nrow1 = ailen[row]; 5824 low1 = 0; 5825 high1 = nrow1; 5826 lastcol2 = -1; 5827 rp2 = bj + bi[row]; 5828 ap2 = ba + bi[row]; 5829 rmax2 = bimax[row]; 5830 nrow2 = bilen[row]; 5831 low2 = 0; 5832 high2 = nrow2; 5833 5834 for (j=0; j<n; j++) { 5835 if (roworiented) value = v[i*n+j]; 5836 else value = v[i+j*m]; 5837 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5838 if (in[j] >= cstart && in[j] < cend) { 5839 col = in[j] - cstart; 5840 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5841 } else if (in[j] < 0) continue; 5842 #if defined(PETSC_USE_DEBUG) 5843 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5844 #endif 5845 else { 5846 if (mat->was_assembled) { 5847 if (!aij->colmap) { 5848 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5849 } 5850 #if defined(PETSC_USE_CTABLE) 5851 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5852 col--; 5853 #else 5854 col = aij->colmap[in[j]] - 1; 5855 #endif 5856 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5857 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5858 col = in[j]; 5859 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5860 B = aij->B; 5861 b = (Mat_SeqAIJ*)B->data; 5862 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5863 rp2 = bj + bi[row]; 5864 ap2 = ba + bi[row]; 5865 rmax2 = bimax[row]; 5866 nrow2 = bilen[row]; 5867 low2 = 0; 5868 high2 = nrow2; 5869 bm = aij->B->rmap->n; 5870 ba = b->a; 5871 } 5872 } else col = in[j]; 5873 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5874 } 5875 } 5876 } else if (!aij->donotstash) { 5877 if (roworiented) { 5878 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5879 } else { 5880 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5881 } 5882 } 5883 } 5884 } 5885 PetscFunctionReturnVoid(); 5886 } 5887 5888