1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 6 /*MC 7 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 8 9 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 10 and MATMPIAIJ otherwise. As a result, for single process communicators, 11 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 12 for communicators controlling multiple processes. It is recommended that you call both of 13 the above preallocation routines for simplicity. 14 15 Options Database Keys: 16 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 17 18 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 19 enough exist. 20 21 Level: beginner 22 23 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 24 M*/ 25 26 /*MC 27 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 28 29 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 30 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 31 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 32 for communicators controlling multiple processes. It is recommended that you call both of 33 the above preallocation routines for simplicity. 34 35 Options Database Keys: 36 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 37 38 Level: beginner 39 40 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 41 M*/ 42 43 #undef __FUNCT__ 44 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 45 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 46 { 47 PetscErrorCode ierr; 48 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 49 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 50 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 51 const PetscInt *ia,*ib; 52 const MatScalar *aa,*bb; 53 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 54 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 55 56 PetscFunctionBegin; 57 *keptrows = 0; 58 ia = a->i; 59 ib = b->i; 60 for (i=0; i<m; i++) { 61 na = ia[i+1] - ia[i]; 62 nb = ib[i+1] - ib[i]; 63 if (!na && !nb) { 64 cnt++; 65 goto ok1; 66 } 67 aa = a->a + ia[i]; 68 for (j=0; j<na; j++) { 69 if (aa[j] != 0.0) goto ok1; 70 } 71 bb = b->a + ib[i]; 72 for (j=0; j <nb; j++) { 73 if (bb[j] != 0.0) goto ok1; 74 } 75 cnt++; 76 ok1:; 77 } 78 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 79 if (!n0rows) PetscFunctionReturn(0); 80 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 81 cnt = 0; 82 for (i=0; i<m; i++) { 83 na = ia[i+1] - ia[i]; 84 nb = ib[i+1] - ib[i]; 85 if (!na && !nb) continue; 86 aa = a->a + ia[i]; 87 for (j=0; j<na;j++) { 88 if (aa[j] != 0.0) { 89 rows[cnt++] = rstart + i; 90 goto ok2; 91 } 92 } 93 bb = b->a + ib[i]; 94 for (j=0; j<nb; j++) { 95 if (bb[j] != 0.0) { 96 rows[cnt++] = rstart + i; 97 goto ok2; 98 } 99 } 100 ok2:; 101 } 102 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 108 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 109 { 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 111 PetscErrorCode ierr; 112 PetscInt i,rstart,nrows,*rows; 113 114 PetscFunctionBegin; 115 *zrows = NULL; 116 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 117 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 118 for (i=0; i<nrows; i++) rows[i] += rstart; 119 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 125 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 126 { 127 PetscErrorCode ierr; 128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 129 PetscInt i,n,*garray = aij->garray; 130 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 131 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 132 PetscReal *work; 133 134 PetscFunctionBegin; 135 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 136 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 137 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 CHKMEMQ; 197 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 199 if (!rank) { 200 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 201 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 202 } 203 if (reuse == MAT_INITIAL_MATRIX) { 204 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 205 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 206 if (!rank) { 207 bses[0] = gmat->rmap->bs; 208 bses[1] = gmat->cmap->bs; 209 } 210 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 211 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 212 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 213 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 214 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 215 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 216 217 rowners[0] = 0; 218 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 219 rstart = rowners[rank]; 220 rend = rowners[rank+1]; 221 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 222 if (!rank) { 223 gmata = (Mat_SeqAIJ*) gmat->data; 224 /* send row lengths to all processors */ 225 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 226 for (i=1; i<size; i++) { 227 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 228 } 229 /* determine number diagonal and off-diagonal counts */ 230 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 231 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 232 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 233 jj = 0; 234 for (i=0; i<m; i++) { 235 for (j=0; j<dlens[i]; j++) { 236 if (gmata->j[jj] < rstart) ld[i]++; 237 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 238 jj++; 239 } 240 } 241 /* send column indices to other processes */ 242 for (i=1; i<size; i++) { 243 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 244 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 245 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 246 } 247 248 /* send numerical values to other processes */ 249 for (i=1; i<size; i++) { 250 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 251 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 252 } 253 gmataa = gmata->a; 254 gmataj = gmata->j; 255 256 } else { 257 /* receive row lengths */ 258 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 /* receive column indices */ 260 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 262 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 263 /* determine number diagonal and off-diagonal counts */ 264 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 265 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 266 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 267 jj = 0; 268 for (i=0; i<m; i++) { 269 for (j=0; j<dlens[i]; j++) { 270 if (gmataj[jj] < rstart) ld[i]++; 271 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 272 jj++; 273 } 274 } 275 /* receive numerical values */ 276 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 277 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 278 } 279 /* set preallocation */ 280 for (i=0; i<m; i++) { 281 dlens[i] -= olens[i]; 282 } 283 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 284 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 285 286 for (i=0; i<m; i++) { 287 dlens[i] += olens[i]; 288 } 289 cnt = 0; 290 for (i=0; i<m; i++) { 291 row = rstart + i; 292 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 293 cnt += dlens[i]; 294 } 295 if (rank) { 296 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 297 } 298 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 299 ierr = PetscFree(rowners);CHKERRQ(ierr); 300 301 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 302 303 *inmat = mat; 304 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 307 mat = *inmat; 308 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 309 if (!rank) { 310 /* send numerical values to other processes */ 311 gmata = (Mat_SeqAIJ*) gmat->data; 312 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 313 gmataa = gmata->a; 314 for (i=1; i<size; i++) { 315 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 316 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 317 } 318 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 319 } else { 320 /* receive numerical values from process 0*/ 321 nz = Ad->nz + Ao->nz; 322 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 326 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 327 ad = Ad->a; 328 ao = Ao->a; 329 if (mat->rmap->n) { 330 i = 0; 331 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 for (i=1; i<mat->rmap->n; i++) { 335 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 336 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 337 } 338 i--; 339 if (mat->rmap->n) { 340 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 341 } 342 if (rank) { 343 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 344 } 345 } 346 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 CHKMEMQ; 349 PetscFunctionReturn(0); 350 } 351 352 /* 353 Local utility routine that creates a mapping from the global column 354 number to the local number in the off-diagonal part of the local 355 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 356 a slightly higher hash table cost; without it it is not scalable (each processor 357 has an order N integer array but is fast to acess. 358 */ 359 #undef __FUNCT__ 360 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 361 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 362 { 363 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 364 PetscErrorCode ierr; 365 PetscInt n = aij->B->cmap->n,i; 366 367 PetscFunctionBegin; 368 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 369 #if defined(PETSC_USE_CTABLE) 370 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 371 for (i=0; i<n; i++) { 372 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 373 } 374 #else 375 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 376 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 377 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 378 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 379 #endif 380 PetscFunctionReturn(0); 381 } 382 383 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 384 { \ 385 if (col <= lastcol1) low1 = 0; \ 386 else high1 = nrow1; \ 387 lastcol1 = col;\ 388 while (high1-low1 > 5) { \ 389 t = (low1+high1)/2; \ 390 if (rp1[t] > col) high1 = t; \ 391 else low1 = t; \ 392 } \ 393 for (_i=low1; _i<high1; _i++) { \ 394 if (rp1[_i] > col) break; \ 395 if (rp1[_i] == col) { \ 396 if (addv == ADD_VALUES) ap1[_i] += value; \ 397 else ap1[_i] = value; \ 398 goto a_noinsert; \ 399 } \ 400 } \ 401 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 402 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 403 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 404 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 405 N = nrow1++ - 1; a->nz++; high1++; \ 406 /* shift up all the later entries in this row */ \ 407 for (ii=N; ii>=_i; ii--) { \ 408 rp1[ii+1] = rp1[ii]; \ 409 ap1[ii+1] = ap1[ii]; \ 410 } \ 411 rp1[_i] = col; \ 412 ap1[_i] = value; \ 413 a_noinsert: ; \ 414 ailen[row] = nrow1; \ 415 } 416 417 418 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 419 { \ 420 if (col <= lastcol2) low2 = 0; \ 421 else high2 = nrow2; \ 422 lastcol2 = col; \ 423 while (high2-low2 > 5) { \ 424 t = (low2+high2)/2; \ 425 if (rp2[t] > col) high2 = t; \ 426 else low2 = t; \ 427 } \ 428 for (_i=low2; _i<high2; _i++) { \ 429 if (rp2[_i] > col) break; \ 430 if (rp2[_i] == col) { \ 431 if (addv == ADD_VALUES) ap2[_i] += value; \ 432 else ap2[_i] = value; \ 433 goto b_noinsert; \ 434 } \ 435 } \ 436 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 437 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 438 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 439 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 440 N = nrow2++ - 1; b->nz++; high2++; \ 441 /* shift up all the later entries in this row */ \ 442 for (ii=N; ii>=_i; ii--) { \ 443 rp2[ii+1] = rp2[ii]; \ 444 ap2[ii+1] = ap2[ii]; \ 445 } \ 446 rp2[_i] = col; \ 447 ap2[_i] = value; \ 448 b_noinsert: ; \ 449 bilen[row] = nrow2; \ 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 454 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 455 { 456 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 457 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 458 PetscErrorCode ierr; 459 PetscInt l,*garray = mat->garray,diag; 460 461 PetscFunctionBegin; 462 /* code only works for square matrices A */ 463 464 /* find size of row to the left of the diagonal part */ 465 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 466 row = row - diag; 467 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 468 if (garray[b->j[b->i[row]+l]] > diag) break; 469 } 470 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 471 472 /* diagonal part */ 473 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 474 475 /* right of diagonal part */ 476 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 477 PetscFunctionReturn(0); 478 } 479 480 #undef __FUNCT__ 481 #define __FUNCT__ "MatSetValues_MPIAIJ" 482 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 483 { 484 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 485 PetscScalar value; 486 PetscErrorCode ierr; 487 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 488 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 489 PetscBool roworiented = aij->roworiented; 490 491 /* Some Variables required in the macro */ 492 Mat A = aij->A; 493 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 494 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 495 MatScalar *aa = a->a; 496 PetscBool ignorezeroentries = a->ignorezeroentries; 497 Mat B = aij->B; 498 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 499 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 500 MatScalar *ba = b->a; 501 502 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 503 PetscInt nonew; 504 MatScalar *ap1,*ap2; 505 506 PetscFunctionBegin; 507 if (v) PetscValidScalarPointer(v,6); 508 for (i=0; i<m; i++) { 509 if (im[i] < 0) continue; 510 #if defined(PETSC_USE_DEBUG) 511 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 512 #endif 513 if (im[i] >= rstart && im[i] < rend) { 514 row = im[i] - rstart; 515 lastcol1 = -1; 516 rp1 = aj + ai[row]; 517 ap1 = aa + ai[row]; 518 rmax1 = aimax[row]; 519 nrow1 = ailen[row]; 520 low1 = 0; 521 high1 = nrow1; 522 lastcol2 = -1; 523 rp2 = bj + bi[row]; 524 ap2 = ba + bi[row]; 525 rmax2 = bimax[row]; 526 nrow2 = bilen[row]; 527 low2 = 0; 528 high2 = nrow2; 529 530 for (j=0; j<n; j++) { 531 if (v) { 532 if (roworiented) value = v[i*n+j]; 533 else value = v[i+j*m]; 534 } else value = 0.0; 535 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 536 if (in[j] >= cstart && in[j] < cend) { 537 col = in[j] - cstart; 538 nonew = a->nonew; 539 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 540 } else if (in[j] < 0) continue; 541 #if defined(PETSC_USE_DEBUG) 542 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 543 #endif 544 else { 545 if (mat->was_assembled) { 546 if (!aij->colmap) { 547 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 548 } 549 #if defined(PETSC_USE_CTABLE) 550 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 551 col--; 552 #else 553 col = aij->colmap[in[j]] - 1; 554 #endif 555 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 556 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 557 col = in[j]; 558 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 559 B = aij->B; 560 b = (Mat_SeqAIJ*)B->data; 561 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 562 rp2 = bj + bi[row]; 563 ap2 = ba + bi[row]; 564 rmax2 = bimax[row]; 565 nrow2 = bilen[row]; 566 low2 = 0; 567 high2 = nrow2; 568 bm = aij->B->rmap->n; 569 ba = b->a; 570 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 571 } else col = in[j]; 572 nonew = b->nonew; 573 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 574 } 575 } 576 } else { 577 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 578 if (!aij->donotstash) { 579 mat->assembled = PETSC_FALSE; 580 if (roworiented) { 581 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 582 } else { 583 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 584 } 585 } 586 } 587 } 588 PetscFunctionReturn(0); 589 } 590 591 #undef __FUNCT__ 592 #define __FUNCT__ "MatGetValues_MPIAIJ" 593 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 594 { 595 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 596 PetscErrorCode ierr; 597 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 598 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 599 600 PetscFunctionBegin; 601 for (i=0; i<m; i++) { 602 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 603 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 604 if (idxm[i] >= rstart && idxm[i] < rend) { 605 row = idxm[i] - rstart; 606 for (j=0; j<n; j++) { 607 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 608 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 609 if (idxn[j] >= cstart && idxn[j] < cend) { 610 col = idxn[j] - cstart; 611 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 612 } else { 613 if (!aij->colmap) { 614 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 615 } 616 #if defined(PETSC_USE_CTABLE) 617 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 618 col--; 619 #else 620 col = aij->colmap[idxn[j]] - 1; 621 #endif 622 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 623 else { 624 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 625 } 626 } 627 } 628 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 629 } 630 PetscFunctionReturn(0); 631 } 632 633 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 634 635 #undef __FUNCT__ 636 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 637 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 638 { 639 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 640 PetscErrorCode ierr; 641 PetscInt nstash,reallocs; 642 InsertMode addv; 643 644 PetscFunctionBegin; 645 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 646 647 /* make sure all processors are either in INSERTMODE or ADDMODE */ 648 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 649 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 650 mat->insertmode = addv; /* in case this processor had no cache */ 651 652 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 653 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 654 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 655 PetscFunctionReturn(0); 656 } 657 658 #undef __FUNCT__ 659 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 660 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 661 { 662 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 663 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 664 PetscErrorCode ierr; 665 PetscMPIInt n; 666 PetscInt i,j,rstart,ncols,flg; 667 PetscInt *row,*col; 668 PetscBool other_disassembled; 669 PetscScalar *val; 670 InsertMode addv = mat->insertmode; 671 672 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 673 674 PetscFunctionBegin; 675 if (!aij->donotstash && !mat->nooffprocentries) { 676 while (1) { 677 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 678 if (!flg) break; 679 680 for (i=0; i<n; ) { 681 /* Now identify the consecutive vals belonging to the same row */ 682 for (j=i,rstart=row[j]; j<n; j++) { 683 if (row[j] != rstart) break; 684 } 685 if (j < n) ncols = j-i; 686 else ncols = n-i; 687 /* Now assemble all these values with a single function call */ 688 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 689 690 i = j; 691 } 692 } 693 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 694 } 695 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 696 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 697 698 /* determine if any processor has disassembled, if so we must 699 also disassemble ourselfs, in order that we may reassemble. */ 700 /* 701 if nonzero structure of submatrix B cannot change then we know that 702 no processor disassembled thus we can skip this stuff 703 */ 704 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 705 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 706 if (mat->was_assembled && !other_disassembled) { 707 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 708 } 709 } 710 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 711 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 712 } 713 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 714 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 715 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 716 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 717 718 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 719 720 aij->rowvalues = 0; 721 722 /* used by MatAXPY() */ 723 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 724 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 725 726 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 727 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 733 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 734 { 735 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 740 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatZeroRows_MPIAIJ" 746 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 747 { 748 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 749 PetscErrorCode ierr; 750 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 751 PetscInt i,*owners = A->rmap->range; 752 PetscInt *nprocs,j,idx,nsends,row; 753 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 754 PetscInt *rvalues,count,base,slen,*source; 755 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 756 MPI_Comm comm; 757 MPI_Request *send_waits,*recv_waits; 758 MPI_Status recv_status,*send_status; 759 const PetscScalar *xx; 760 PetscScalar *bb; 761 #if defined(PETSC_DEBUG) 762 PetscBool found = PETSC_FALSE; 763 #endif 764 765 PetscFunctionBegin; 766 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 767 /* first count number of contributors to each processor */ 768 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 769 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 770 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 771 j = 0; 772 for (i=0; i<N; i++) { 773 if (lastidx > (idx = rows[i])) j = 0; 774 lastidx = idx; 775 for (; j<size; j++) { 776 if (idx >= owners[j] && idx < owners[j+1]) { 777 nprocs[2*j]++; 778 nprocs[2*j+1] = 1; 779 owner[i] = j; 780 #if defined(PETSC_DEBUG) 781 found = PETSC_TRUE; 782 #endif 783 break; 784 } 785 } 786 #if defined(PETSC_DEBUG) 787 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 788 found = PETSC_FALSE; 789 #endif 790 } 791 nsends = 0; 792 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 793 794 if (A->nooffproczerorows) { 795 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 796 nrecvs = nsends; 797 nmax = N; 798 } else { 799 /* inform other processors of number of messages and max length*/ 800 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 801 } 802 803 /* post receives: */ 804 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 805 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 806 for (i=0; i<nrecvs; i++) { 807 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 808 } 809 810 /* do sends: 811 1) starts[i] gives the starting index in svalues for stuff going to 812 the ith processor 813 */ 814 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 815 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 816 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 817 818 starts[0] = 0; 819 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 820 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 821 822 starts[0] = 0; 823 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 824 count = 0; 825 for (i=0; i<size; i++) { 826 if (nprocs[2*i+1]) { 827 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 828 } 829 } 830 ierr = PetscFree(starts);CHKERRQ(ierr); 831 832 base = owners[rank]; 833 834 /* wait on receives */ 835 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 836 count = nrecvs; slen = 0; 837 while (count) { 838 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 839 /* unpack receives into our local space */ 840 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 841 842 source[imdex] = recv_status.MPI_SOURCE; 843 lens[imdex] = n; 844 slen += n; 845 count--; 846 } 847 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 848 849 /* move the data into the send scatter */ 850 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 851 count = 0; 852 for (i=0; i<nrecvs; i++) { 853 values = rvalues + i*nmax; 854 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 855 } 856 ierr = PetscFree(rvalues);CHKERRQ(ierr); 857 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 858 ierr = PetscFree(owner);CHKERRQ(ierr); 859 ierr = PetscFree(nprocs);CHKERRQ(ierr); 860 861 /* fix right hand side if needed */ 862 if (x && b) { 863 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 864 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 865 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 866 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 867 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 868 } 869 /* 870 Zero the required rows. If the "diagonal block" of the matrix 871 is square and the user wishes to set the diagonal we use separate 872 code so that MatSetValues() is not called for each diagonal allocating 873 new memory, thus calling lots of mallocs and slowing things down. 874 875 */ 876 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 877 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 878 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 879 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 880 } else if (diag != 0.0) { 881 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 882 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 883 for (i = 0; i < slen; i++) { 884 row = lrows[i] + rstart; 885 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 886 } 887 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 889 } else { 890 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 891 } 892 ierr = PetscFree(lrows);CHKERRQ(ierr); 893 894 /* wait on sends */ 895 if (nsends) { 896 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 897 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 898 ierr = PetscFree(send_status);CHKERRQ(ierr); 899 } 900 ierr = PetscFree(send_waits);CHKERRQ(ierr); 901 ierr = PetscFree(svalues);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } 904 905 #undef __FUNCT__ 906 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 907 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 908 { 909 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 910 PetscErrorCode ierr; 911 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 912 PetscInt i,*owners = A->rmap->range; 913 PetscInt *nprocs,j,idx,nsends; 914 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 915 PetscInt *rvalues,count,base,slen,*source; 916 PetscInt *lens,*lrows,*values,m; 917 MPI_Comm comm; 918 MPI_Request *send_waits,*recv_waits; 919 MPI_Status recv_status,*send_status; 920 const PetscScalar *xx; 921 PetscScalar *bb,*mask; 922 Vec xmask,lmask; 923 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 924 const PetscInt *aj, *ii,*ridx; 925 PetscScalar *aa; 926 #if defined(PETSC_DEBUG) 927 PetscBool found = PETSC_FALSE; 928 #endif 929 930 PetscFunctionBegin; 931 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 932 /* first count number of contributors to each processor */ 933 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 934 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 935 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 936 j = 0; 937 for (i=0; i<N; i++) { 938 if (lastidx > (idx = rows[i])) j = 0; 939 lastidx = idx; 940 for (; j<size; j++) { 941 if (idx >= owners[j] && idx < owners[j+1]) { 942 nprocs[2*j]++; 943 nprocs[2*j+1] = 1; 944 owner[i] = j; 945 #if defined(PETSC_DEBUG) 946 found = PETSC_TRUE; 947 #endif 948 break; 949 } 950 } 951 #if defined(PETSC_DEBUG) 952 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 953 found = PETSC_FALSE; 954 #endif 955 } 956 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 957 958 /* inform other processors of number of messages and max length*/ 959 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 960 961 /* post receives: */ 962 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 963 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 964 for (i=0; i<nrecvs; i++) { 965 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 966 } 967 968 /* do sends: 969 1) starts[i] gives the starting index in svalues for stuff going to 970 the ith processor 971 */ 972 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 973 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 974 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 975 976 starts[0] = 0; 977 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 978 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 979 980 starts[0] = 0; 981 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 982 count = 0; 983 for (i=0; i<size; i++) { 984 if (nprocs[2*i+1]) { 985 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 986 } 987 } 988 ierr = PetscFree(starts);CHKERRQ(ierr); 989 990 base = owners[rank]; 991 992 /* wait on receives */ 993 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 994 count = nrecvs; slen = 0; 995 while (count) { 996 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 997 /* unpack receives into our local space */ 998 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 999 1000 source[imdex] = recv_status.MPI_SOURCE; 1001 lens[imdex] = n; 1002 slen += n; 1003 count--; 1004 } 1005 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1006 1007 /* move the data into the send scatter */ 1008 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1009 count = 0; 1010 for (i=0; i<nrecvs; i++) { 1011 values = rvalues + i*nmax; 1012 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1013 } 1014 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1015 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1016 ierr = PetscFree(owner);CHKERRQ(ierr); 1017 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1018 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1019 1020 /* zero diagonal part of matrix */ 1021 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1022 1023 /* handle off diagonal part of matrix */ 1024 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1025 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1026 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1027 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1028 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1029 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1031 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1032 if (x) { 1033 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1035 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1036 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1037 } 1038 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1039 1040 /* remove zeroed rows of off diagonal matrix */ 1041 ii = aij->i; 1042 for (i=0; i<slen; i++) { 1043 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1044 } 1045 1046 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1047 if (aij->compressedrow.use) { 1048 m = aij->compressedrow.nrows; 1049 ii = aij->compressedrow.i; 1050 ridx = aij->compressedrow.rindex; 1051 for (i=0; i<m; i++) { 1052 n = ii[i+1] - ii[i]; 1053 aj = aij->j + ii[i]; 1054 aa = aij->a + ii[i]; 1055 1056 for (j=0; j<n; j++) { 1057 if (PetscAbsScalar(mask[*aj])) { 1058 if (b) bb[*ridx] -= *aa*xx[*aj]; 1059 *aa = 0.0; 1060 } 1061 aa++; 1062 aj++; 1063 } 1064 ridx++; 1065 } 1066 } else { /* do not use compressed row format */ 1067 m = l->B->rmap->n; 1068 for (i=0; i<m; i++) { 1069 n = ii[i+1] - ii[i]; 1070 aj = aij->j + ii[i]; 1071 aa = aij->a + ii[i]; 1072 for (j=0; j<n; j++) { 1073 if (PetscAbsScalar(mask[*aj])) { 1074 if (b) bb[i] -= *aa*xx[*aj]; 1075 *aa = 0.0; 1076 } 1077 aa++; 1078 aj++; 1079 } 1080 } 1081 } 1082 if (x) { 1083 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1084 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1085 } 1086 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1087 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1088 ierr = PetscFree(lrows);CHKERRQ(ierr); 1089 1090 /* wait on sends */ 1091 if (nsends) { 1092 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1093 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1094 ierr = PetscFree(send_status);CHKERRQ(ierr); 1095 } 1096 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1097 ierr = PetscFree(svalues);CHKERRQ(ierr); 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatMult_MPIAIJ" 1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1104 { 1105 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 PetscInt nt; 1108 1109 PetscFunctionBegin; 1110 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1111 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1112 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1113 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1114 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1115 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1121 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1122 { 1123 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 #undef __FUNCT__ 1132 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1133 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1134 { 1135 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1136 PetscErrorCode ierr; 1137 1138 PetscFunctionBegin; 1139 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1141 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1142 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1148 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1149 { 1150 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1151 PetscErrorCode ierr; 1152 PetscBool merged; 1153 1154 PetscFunctionBegin; 1155 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1156 /* do nondiagonal part */ 1157 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1158 if (!merged) { 1159 /* send it on its way */ 1160 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1161 /* do local part */ 1162 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1163 /* receive remote parts: note this assumes the values are not actually */ 1164 /* added in yy until the next line, */ 1165 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1166 } else { 1167 /* do local part */ 1168 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1169 /* send it on its way */ 1170 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1171 /* values actually were received in the Begin() but we need to call this nop */ 1172 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1173 } 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1179 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1180 { 1181 MPI_Comm comm; 1182 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1183 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1184 IS Me,Notme; 1185 PetscErrorCode ierr; 1186 PetscInt M,N,first,last,*notme,i; 1187 PetscMPIInt size; 1188 1189 PetscFunctionBegin; 1190 /* Easy test: symmetric diagonal block */ 1191 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1192 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1193 if (!*f) PetscFunctionReturn(0); 1194 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1196 if (size == 1) PetscFunctionReturn(0); 1197 1198 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1199 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1200 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1201 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1202 for (i=0; i<first; i++) notme[i] = i; 1203 for (i=last; i<M; i++) notme[i-last+first] = i; 1204 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1205 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1206 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1207 Aoff = Aoffs[0]; 1208 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1209 Boff = Boffs[0]; 1210 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1211 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1212 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1213 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1214 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1215 ierr = PetscFree(notme);CHKERRQ(ierr); 1216 PetscFunctionReturn(0); 1217 } 1218 1219 #undef __FUNCT__ 1220 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1221 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1222 { 1223 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1224 PetscErrorCode ierr; 1225 1226 PetscFunctionBegin; 1227 /* do nondiagonal part */ 1228 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1229 /* send it on its way */ 1230 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1231 /* do local part */ 1232 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1233 /* receive remote parts */ 1234 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 /* 1239 This only works correctly for square matrices where the subblock A->A is the 1240 diagonal block 1241 */ 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1244 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1245 { 1246 PetscErrorCode ierr; 1247 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1248 1249 PetscFunctionBegin; 1250 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1251 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1252 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "MatScale_MPIAIJ" 1258 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1259 { 1260 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1261 PetscErrorCode ierr; 1262 1263 PetscFunctionBegin; 1264 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1265 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatDestroy_MPIAIJ" 1271 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1272 { 1273 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1274 PetscErrorCode ierr; 1275 1276 PetscFunctionBegin; 1277 #if defined(PETSC_USE_LOG) 1278 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1279 #endif 1280 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1281 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1282 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1283 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1284 #if defined(PETSC_USE_CTABLE) 1285 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1286 #else 1287 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1288 #endif 1289 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1290 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1291 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1292 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1293 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1294 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1295 1296 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",NULL);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1311 { 1312 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1313 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1314 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1315 PetscErrorCode ierr; 1316 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1317 int fd; 1318 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1319 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1320 PetscScalar *column_values; 1321 PetscInt message_count,flowcontrolcount; 1322 FILE *file; 1323 1324 PetscFunctionBegin; 1325 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1326 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1327 nz = A->nz + B->nz; 1328 if (!rank) { 1329 header[0] = MAT_FILE_CLASSID; 1330 header[1] = mat->rmap->N; 1331 header[2] = mat->cmap->N; 1332 1333 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1334 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1335 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1336 /* get largest number of rows any processor has */ 1337 rlen = mat->rmap->n; 1338 range = mat->rmap->range; 1339 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1340 } else { 1341 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1342 rlen = mat->rmap->n; 1343 } 1344 1345 /* load up the local row counts */ 1346 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1347 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1348 1349 /* store the row lengths to the file */ 1350 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1351 if (!rank) { 1352 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1353 for (i=1; i<size; i++) { 1354 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1355 rlen = range[i+1] - range[i]; 1356 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1357 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1358 } 1359 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1360 } else { 1361 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1362 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1363 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1364 } 1365 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1366 1367 /* load up the local column indices */ 1368 nzmax = nz; /* th processor needs space a largest processor needs */ 1369 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1370 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1371 cnt = 0; 1372 for (i=0; i<mat->rmap->n; i++) { 1373 for (j=B->i[i]; j<B->i[i+1]; j++) { 1374 if ((col = garray[B->j[j]]) > cstart) break; 1375 column_indices[cnt++] = col; 1376 } 1377 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1378 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1379 } 1380 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1381 1382 /* store the column indices to the file */ 1383 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1384 if (!rank) { 1385 MPI_Status status; 1386 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1387 for (i=1; i<size; i++) { 1388 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1389 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1390 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1391 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1392 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1393 } 1394 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1395 } else { 1396 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1397 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1398 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1399 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1400 } 1401 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1402 1403 /* load up the local column values */ 1404 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1405 cnt = 0; 1406 for (i=0; i<mat->rmap->n; i++) { 1407 for (j=B->i[i]; j<B->i[i+1]; j++) { 1408 if (garray[B->j[j]] > cstart) break; 1409 column_values[cnt++] = B->a[j]; 1410 } 1411 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1412 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1413 } 1414 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1415 1416 /* store the column values to the file */ 1417 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1418 if (!rank) { 1419 MPI_Status status; 1420 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1421 for (i=1; i<size; i++) { 1422 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1423 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1424 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1425 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1426 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1427 } 1428 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1429 } else { 1430 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1431 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1432 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1433 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1434 } 1435 ierr = PetscFree(column_values);CHKERRQ(ierr); 1436 1437 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1438 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1439 PetscFunctionReturn(0); 1440 } 1441 1442 #include <petscdraw.h> 1443 #undef __FUNCT__ 1444 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1445 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1446 { 1447 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1448 PetscErrorCode ierr; 1449 PetscMPIInt rank = aij->rank,size = aij->size; 1450 PetscBool isdraw,iascii,isbinary; 1451 PetscViewer sviewer; 1452 PetscViewerFormat format; 1453 1454 PetscFunctionBegin; 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1456 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1457 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1458 if (iascii) { 1459 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1460 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1461 MatInfo info; 1462 PetscBool inodes; 1463 1464 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1465 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1466 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1467 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1468 if (!inodes) { 1469 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1470 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1471 } else { 1472 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1473 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1474 } 1475 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1476 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1477 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1478 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1479 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1480 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1481 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1482 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1483 PetscFunctionReturn(0); 1484 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1485 PetscInt inodecount,inodelimit,*inodes; 1486 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1487 if (inodes) { 1488 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1489 } else { 1490 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1491 } 1492 PetscFunctionReturn(0); 1493 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1494 PetscFunctionReturn(0); 1495 } 1496 } else if (isbinary) { 1497 if (size == 1) { 1498 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1499 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1500 } else { 1501 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1502 } 1503 PetscFunctionReturn(0); 1504 } else if (isdraw) { 1505 PetscDraw draw; 1506 PetscBool isnull; 1507 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1508 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1509 } 1510 1511 if (size == 1) { 1512 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1513 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1514 } else { 1515 /* assemble the entire matrix onto first processor. */ 1516 Mat A; 1517 Mat_SeqAIJ *Aloc; 1518 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1519 MatScalar *a; 1520 1521 if (mat->rmap->N > 1024) { 1522 PetscBool flg = PETSC_FALSE; 1523 1524 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1525 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1526 } 1527 1528 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1529 if (!rank) { 1530 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1531 } else { 1532 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1533 } 1534 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1535 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1536 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1537 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1538 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1539 1540 /* copy over the A part */ 1541 Aloc = (Mat_SeqAIJ*)aij->A->data; 1542 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1543 row = mat->rmap->rstart; 1544 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1545 for (i=0; i<m; i++) { 1546 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1547 row++; 1548 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1549 } 1550 aj = Aloc->j; 1551 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1552 1553 /* copy over the B part */ 1554 Aloc = (Mat_SeqAIJ*)aij->B->data; 1555 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1556 row = mat->rmap->rstart; 1557 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1558 ct = cols; 1559 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1560 for (i=0; i<m; i++) { 1561 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1562 row++; 1563 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1564 } 1565 ierr = PetscFree(ct);CHKERRQ(ierr); 1566 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1567 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1568 /* 1569 Everyone has to call to draw the matrix since the graphics waits are 1570 synchronized across all processors that share the PetscDraw object 1571 */ 1572 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1573 if (!rank) { 1574 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1575 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1576 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1577 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1578 } 1579 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1580 ierr = MatDestroy(&A);CHKERRQ(ierr); 1581 } 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatView_MPIAIJ" 1587 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1588 { 1589 PetscErrorCode ierr; 1590 PetscBool iascii,isdraw,issocket,isbinary; 1591 1592 PetscFunctionBegin; 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1595 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1596 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1597 if (iascii || isdraw || isbinary || issocket) { 1598 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1599 } 1600 PetscFunctionReturn(0); 1601 } 1602 1603 #undef __FUNCT__ 1604 #define __FUNCT__ "MatSOR_MPIAIJ" 1605 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1606 { 1607 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1608 PetscErrorCode ierr; 1609 Vec bb1 = 0; 1610 PetscBool hasop; 1611 1612 PetscFunctionBegin; 1613 if (flag == SOR_APPLY_UPPER) { 1614 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1615 PetscFunctionReturn(0); 1616 } 1617 1618 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1619 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1620 } 1621 1622 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1623 if (flag & SOR_ZERO_INITIAL_GUESS) { 1624 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1625 its--; 1626 } 1627 1628 while (its--) { 1629 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1631 1632 /* update rhs: bb1 = bb - B*x */ 1633 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1634 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1635 1636 /* local sweep */ 1637 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1638 } 1639 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1640 if (flag & SOR_ZERO_INITIAL_GUESS) { 1641 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1642 its--; 1643 } 1644 while (its--) { 1645 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1647 1648 /* update rhs: bb1 = bb - B*x */ 1649 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1650 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1651 1652 /* local sweep */ 1653 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1654 } 1655 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1656 if (flag & SOR_ZERO_INITIAL_GUESS) { 1657 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1658 its--; 1659 } 1660 while (its--) { 1661 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1663 1664 /* update rhs: bb1 = bb - B*x */ 1665 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1666 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1667 1668 /* local sweep */ 1669 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1670 } 1671 } else if (flag & SOR_EISENSTAT) { 1672 Vec xx1; 1673 1674 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1675 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1676 1677 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1679 if (!mat->diag) { 1680 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1681 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1682 } 1683 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1684 if (hasop) { 1685 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1686 } else { 1687 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1688 } 1689 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1690 1691 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1692 1693 /* local sweep */ 1694 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1695 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1696 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1697 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1698 1699 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1700 PetscFunctionReturn(0); 1701 } 1702 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "MatPermute_MPIAIJ" 1705 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1706 { 1707 Mat aA,aB,Aperm; 1708 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1709 PetscScalar *aa,*ba; 1710 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1711 PetscSF rowsf,sf; 1712 IS parcolp = NULL; 1713 PetscBool done; 1714 PetscErrorCode ierr; 1715 1716 PetscFunctionBegin; 1717 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1718 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1719 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1720 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1721 1722 /* Invert row permutation to find out where my rows should go */ 1723 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1724 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1725 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1726 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1727 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1728 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1729 1730 /* Invert column permutation to find out where my columns should go */ 1731 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1732 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1733 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1734 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1735 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1736 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1737 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1738 1739 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1740 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1741 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1742 1743 /* Find out where my gcols should go */ 1744 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1745 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1746 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1747 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1748 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1749 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1750 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1751 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1752 1753 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1754 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1755 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1756 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1757 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1758 for (i=0; i<m; i++) { 1759 PetscInt row = rdest[i],rowner; 1760 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1761 for (j=ai[i]; j<ai[i+1]; j++) { 1762 PetscInt cowner,col = cdest[aj[j]]; 1763 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1764 if (rowner == cowner) dnnz[i]++; 1765 else onnz[i]++; 1766 } 1767 for (j=bi[i]; j<bi[i+1]; j++) { 1768 PetscInt cowner,col = gcdest[bj[j]]; 1769 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1770 if (rowner == cowner) dnnz[i]++; 1771 else onnz[i]++; 1772 } 1773 } 1774 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1775 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1776 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1777 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1778 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1779 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1780 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1781 1782 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1783 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1784 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1785 for (i=0; i<m; i++) { 1786 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1787 PetscInt rowlen; 1788 rowlen = ai[i+1] - ai[i]; 1789 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1790 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1791 rowlen = bi[i+1] - bi[i]; 1792 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1793 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1794 } 1795 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1797 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1798 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1799 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1800 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1801 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1802 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1803 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1804 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1805 *B = Aperm; 1806 PetscFunctionReturn(0); 1807 } 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1811 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1812 { 1813 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1814 Mat A = mat->A,B = mat->B; 1815 PetscErrorCode ierr; 1816 PetscReal isend[5],irecv[5]; 1817 1818 PetscFunctionBegin; 1819 info->block_size = 1.0; 1820 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1821 1822 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1823 isend[3] = info->memory; isend[4] = info->mallocs; 1824 1825 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1826 1827 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1828 isend[3] += info->memory; isend[4] += info->mallocs; 1829 if (flag == MAT_LOCAL) { 1830 info->nz_used = isend[0]; 1831 info->nz_allocated = isend[1]; 1832 info->nz_unneeded = isend[2]; 1833 info->memory = isend[3]; 1834 info->mallocs = isend[4]; 1835 } else if (flag == MAT_GLOBAL_MAX) { 1836 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1837 1838 info->nz_used = irecv[0]; 1839 info->nz_allocated = irecv[1]; 1840 info->nz_unneeded = irecv[2]; 1841 info->memory = irecv[3]; 1842 info->mallocs = irecv[4]; 1843 } else if (flag == MAT_GLOBAL_SUM) { 1844 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1845 1846 info->nz_used = irecv[0]; 1847 info->nz_allocated = irecv[1]; 1848 info->nz_unneeded = irecv[2]; 1849 info->memory = irecv[3]; 1850 info->mallocs = irecv[4]; 1851 } 1852 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1853 info->fill_ratio_needed = 0; 1854 info->factor_mallocs = 0; 1855 PetscFunctionReturn(0); 1856 } 1857 1858 #undef __FUNCT__ 1859 #define __FUNCT__ "MatSetOption_MPIAIJ" 1860 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1861 { 1862 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1863 PetscErrorCode ierr; 1864 1865 PetscFunctionBegin; 1866 switch (op) { 1867 case MAT_NEW_NONZERO_LOCATIONS: 1868 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1869 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1870 case MAT_KEEP_NONZERO_PATTERN: 1871 case MAT_NEW_NONZERO_LOCATION_ERR: 1872 case MAT_USE_INODES: 1873 case MAT_IGNORE_ZERO_ENTRIES: 1874 MatCheckPreallocated(A,1); 1875 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1876 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1877 break; 1878 case MAT_ROW_ORIENTED: 1879 a->roworiented = flg; 1880 1881 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1882 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1883 break; 1884 case MAT_NEW_DIAGONALS: 1885 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1886 break; 1887 case MAT_IGNORE_OFF_PROC_ENTRIES: 1888 a->donotstash = flg; 1889 break; 1890 case MAT_SPD: 1891 A->spd_set = PETSC_TRUE; 1892 A->spd = flg; 1893 if (flg) { 1894 A->symmetric = PETSC_TRUE; 1895 A->structurally_symmetric = PETSC_TRUE; 1896 A->symmetric_set = PETSC_TRUE; 1897 A->structurally_symmetric_set = PETSC_TRUE; 1898 } 1899 break; 1900 case MAT_SYMMETRIC: 1901 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1902 break; 1903 case MAT_STRUCTURALLY_SYMMETRIC: 1904 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1905 break; 1906 case MAT_HERMITIAN: 1907 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1908 break; 1909 case MAT_SYMMETRY_ETERNAL: 1910 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1911 break; 1912 default: 1913 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1914 } 1915 PetscFunctionReturn(0); 1916 } 1917 1918 #undef __FUNCT__ 1919 #define __FUNCT__ "MatGetRow_MPIAIJ" 1920 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1921 { 1922 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1923 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1924 PetscErrorCode ierr; 1925 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1926 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1927 PetscInt *cmap,*idx_p; 1928 1929 PetscFunctionBegin; 1930 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1931 mat->getrowactive = PETSC_TRUE; 1932 1933 if (!mat->rowvalues && (idx || v)) { 1934 /* 1935 allocate enough space to hold information from the longest row. 1936 */ 1937 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1938 PetscInt max = 1,tmp; 1939 for (i=0; i<matin->rmap->n; i++) { 1940 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1941 if (max < tmp) max = tmp; 1942 } 1943 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1944 } 1945 1946 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1947 lrow = row - rstart; 1948 1949 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1950 if (!v) {pvA = 0; pvB = 0;} 1951 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1952 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1953 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1954 nztot = nzA + nzB; 1955 1956 cmap = mat->garray; 1957 if (v || idx) { 1958 if (nztot) { 1959 /* Sort by increasing column numbers, assuming A and B already sorted */ 1960 PetscInt imark = -1; 1961 if (v) { 1962 *v = v_p = mat->rowvalues; 1963 for (i=0; i<nzB; i++) { 1964 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1965 else break; 1966 } 1967 imark = i; 1968 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1969 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1970 } 1971 if (idx) { 1972 *idx = idx_p = mat->rowindices; 1973 if (imark > -1) { 1974 for (i=0; i<imark; i++) { 1975 idx_p[i] = cmap[cworkB[i]]; 1976 } 1977 } else { 1978 for (i=0; i<nzB; i++) { 1979 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1980 else break; 1981 } 1982 imark = i; 1983 } 1984 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1985 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1986 } 1987 } else { 1988 if (idx) *idx = 0; 1989 if (v) *v = 0; 1990 } 1991 } 1992 *nz = nztot; 1993 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1994 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1995 PetscFunctionReturn(0); 1996 } 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 2000 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2001 { 2002 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2003 2004 PetscFunctionBegin; 2005 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2006 aij->getrowactive = PETSC_FALSE; 2007 PetscFunctionReturn(0); 2008 } 2009 2010 #undef __FUNCT__ 2011 #define __FUNCT__ "MatNorm_MPIAIJ" 2012 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2013 { 2014 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2015 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2016 PetscErrorCode ierr; 2017 PetscInt i,j,cstart = mat->cmap->rstart; 2018 PetscReal sum = 0.0; 2019 MatScalar *v; 2020 2021 PetscFunctionBegin; 2022 if (aij->size == 1) { 2023 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2024 } else { 2025 if (type == NORM_FROBENIUS) { 2026 v = amat->a; 2027 for (i=0; i<amat->nz; i++) { 2028 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2029 } 2030 v = bmat->a; 2031 for (i=0; i<bmat->nz; i++) { 2032 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2033 } 2034 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2035 *norm = PetscSqrtReal(*norm); 2036 } else if (type == NORM_1) { /* max column norm */ 2037 PetscReal *tmp,*tmp2; 2038 PetscInt *jj,*garray = aij->garray; 2039 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2040 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2041 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2042 *norm = 0.0; 2043 v = amat->a; jj = amat->j; 2044 for (j=0; j<amat->nz; j++) { 2045 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2046 } 2047 v = bmat->a; jj = bmat->j; 2048 for (j=0; j<bmat->nz; j++) { 2049 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2050 } 2051 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2052 for (j=0; j<mat->cmap->N; j++) { 2053 if (tmp2[j] > *norm) *norm = tmp2[j]; 2054 } 2055 ierr = PetscFree(tmp);CHKERRQ(ierr); 2056 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2057 } else if (type == NORM_INFINITY) { /* max row norm */ 2058 PetscReal ntemp = 0.0; 2059 for (j=0; j<aij->A->rmap->n; j++) { 2060 v = amat->a + amat->i[j]; 2061 sum = 0.0; 2062 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2063 sum += PetscAbsScalar(*v); v++; 2064 } 2065 v = bmat->a + bmat->i[j]; 2066 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2067 sum += PetscAbsScalar(*v); v++; 2068 } 2069 if (sum > ntemp) ntemp = sum; 2070 } 2071 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2072 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2073 } 2074 PetscFunctionReturn(0); 2075 } 2076 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatTranspose_MPIAIJ" 2079 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2080 { 2081 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2082 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2083 PetscErrorCode ierr; 2084 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2085 PetscInt cstart = A->cmap->rstart,ncol; 2086 Mat B; 2087 MatScalar *array; 2088 2089 PetscFunctionBegin; 2090 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2091 2092 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2093 ai = Aloc->i; aj = Aloc->j; 2094 bi = Bloc->i; bj = Bloc->j; 2095 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2096 PetscInt *d_nnz,*g_nnz,*o_nnz; 2097 PetscSFNode *oloc; 2098 PETSC_UNUSED PetscSF sf; 2099 2100 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2101 /* compute d_nnz for preallocation */ 2102 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2103 for (i=0; i<ai[ma]; i++) { 2104 d_nnz[aj[i]]++; 2105 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2106 } 2107 /* compute local off-diagonal contributions */ 2108 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2109 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2110 /* map those to global */ 2111 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2112 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2113 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2114 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2115 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2116 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2117 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2118 2119 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2120 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2121 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2122 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2123 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2124 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2125 } else { 2126 B = *matout; 2127 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2128 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2129 } 2130 2131 /* copy over the A part */ 2132 array = Aloc->a; 2133 row = A->rmap->rstart; 2134 for (i=0; i<ma; i++) { 2135 ncol = ai[i+1]-ai[i]; 2136 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2137 row++; 2138 array += ncol; aj += ncol; 2139 } 2140 aj = Aloc->j; 2141 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2142 2143 /* copy over the B part */ 2144 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2145 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2146 array = Bloc->a; 2147 row = A->rmap->rstart; 2148 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2149 cols_tmp = cols; 2150 for (i=0; i<mb; i++) { 2151 ncol = bi[i+1]-bi[i]; 2152 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2153 row++; 2154 array += ncol; cols_tmp += ncol; 2155 } 2156 ierr = PetscFree(cols);CHKERRQ(ierr); 2157 2158 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2160 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2161 *matout = B; 2162 } else { 2163 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2164 } 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2170 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2171 { 2172 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2173 Mat a = aij->A,b = aij->B; 2174 PetscErrorCode ierr; 2175 PetscInt s1,s2,s3; 2176 2177 PetscFunctionBegin; 2178 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2179 if (rr) { 2180 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2181 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2182 /* Overlap communication with computation. */ 2183 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2184 } 2185 if (ll) { 2186 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2187 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2188 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2189 } 2190 /* scale the diagonal block */ 2191 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2192 2193 if (rr) { 2194 /* Do a scatter end and then right scale the off-diagonal block */ 2195 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2196 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2197 } 2198 PetscFunctionReturn(0); 2199 } 2200 2201 #undef __FUNCT__ 2202 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2203 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2204 { 2205 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2206 PetscErrorCode ierr; 2207 2208 PetscFunctionBegin; 2209 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2210 PetscFunctionReturn(0); 2211 } 2212 2213 #undef __FUNCT__ 2214 #define __FUNCT__ "MatEqual_MPIAIJ" 2215 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2216 { 2217 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2218 Mat a,b,c,d; 2219 PetscBool flg; 2220 PetscErrorCode ierr; 2221 2222 PetscFunctionBegin; 2223 a = matA->A; b = matA->B; 2224 c = matB->A; d = matB->B; 2225 2226 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2227 if (flg) { 2228 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2229 } 2230 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2231 PetscFunctionReturn(0); 2232 } 2233 2234 #undef __FUNCT__ 2235 #define __FUNCT__ "MatCopy_MPIAIJ" 2236 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2237 { 2238 PetscErrorCode ierr; 2239 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2240 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2241 2242 PetscFunctionBegin; 2243 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2244 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2245 /* because of the column compression in the off-processor part of the matrix a->B, 2246 the number of columns in a->B and b->B may be different, hence we cannot call 2247 the MatCopy() directly on the two parts. If need be, we can provide a more 2248 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2249 then copying the submatrices */ 2250 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2251 } else { 2252 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2253 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2254 } 2255 PetscFunctionReturn(0); 2256 } 2257 2258 #undef __FUNCT__ 2259 #define __FUNCT__ "MatSetUp_MPIAIJ" 2260 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2261 { 2262 PetscErrorCode ierr; 2263 2264 PetscFunctionBegin; 2265 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2266 PetscFunctionReturn(0); 2267 } 2268 2269 #undef __FUNCT__ 2270 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2271 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2272 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2273 { 2274 PetscInt i,m=Y->rmap->N; 2275 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2276 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2277 const PetscInt *xi = x->i,*yi = y->i; 2278 2279 PetscFunctionBegin; 2280 /* Set the number of nonzeros in the new matrix */ 2281 for (i=0; i<m; i++) { 2282 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2283 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2284 nnz[i] = 0; 2285 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2286 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2287 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2288 nnz[i]++; 2289 } 2290 for (; k<nzy; k++) nnz[i]++; 2291 } 2292 PetscFunctionReturn(0); 2293 } 2294 2295 #undef __FUNCT__ 2296 #define __FUNCT__ "MatAXPY_MPIAIJ" 2297 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2298 { 2299 PetscErrorCode ierr; 2300 PetscInt i; 2301 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2302 PetscBLASInt bnz,one=1; 2303 Mat_SeqAIJ *x,*y; 2304 2305 PetscFunctionBegin; 2306 if (str == SAME_NONZERO_PATTERN) { 2307 PetscScalar alpha = a; 2308 x = (Mat_SeqAIJ*)xx->A->data; 2309 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2310 y = (Mat_SeqAIJ*)yy->A->data; 2311 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2312 x = (Mat_SeqAIJ*)xx->B->data; 2313 y = (Mat_SeqAIJ*)yy->B->data; 2314 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2315 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2316 } else if (str == SUBSET_NONZERO_PATTERN) { 2317 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2318 2319 x = (Mat_SeqAIJ*)xx->B->data; 2320 y = (Mat_SeqAIJ*)yy->B->data; 2321 if (y->xtoy && y->XtoY != xx->B) { 2322 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2323 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2324 } 2325 if (!y->xtoy) { /* get xtoy */ 2326 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2327 y->XtoY = xx->B; 2328 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2329 } 2330 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2331 } else { 2332 Mat B; 2333 PetscInt *nnz_d,*nnz_o; 2334 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2335 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2336 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2337 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2338 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2339 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2340 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2341 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2342 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2343 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2344 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2345 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2346 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2347 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2348 } 2349 PetscFunctionReturn(0); 2350 } 2351 2352 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2353 2354 #undef __FUNCT__ 2355 #define __FUNCT__ "MatConjugate_MPIAIJ" 2356 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2357 { 2358 #if defined(PETSC_USE_COMPLEX) 2359 PetscErrorCode ierr; 2360 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2361 2362 PetscFunctionBegin; 2363 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2364 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2365 #else 2366 PetscFunctionBegin; 2367 #endif 2368 PetscFunctionReturn(0); 2369 } 2370 2371 #undef __FUNCT__ 2372 #define __FUNCT__ "MatRealPart_MPIAIJ" 2373 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2374 { 2375 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2376 PetscErrorCode ierr; 2377 2378 PetscFunctionBegin; 2379 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2380 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2381 PetscFunctionReturn(0); 2382 } 2383 2384 #undef __FUNCT__ 2385 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2386 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2387 { 2388 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2389 PetscErrorCode ierr; 2390 2391 PetscFunctionBegin; 2392 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2393 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2394 PetscFunctionReturn(0); 2395 } 2396 2397 #if defined(PETSC_HAVE_PBGL) 2398 2399 #include <boost/parallel/mpi/bsp_process_group.hpp> 2400 #include <boost/graph/distributed/ilu_default_graph.hpp> 2401 #include <boost/graph/distributed/ilu_0_block.hpp> 2402 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2403 #include <boost/graph/distributed/petsc/interface.hpp> 2404 #include <boost/multi_array.hpp> 2405 #include <boost/parallel/distributed_property_map->hpp> 2406 2407 #undef __FUNCT__ 2408 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2409 /* 2410 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2411 */ 2412 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2413 { 2414 namespace petsc = boost::distributed::petsc; 2415 2416 namespace graph_dist = boost::graph::distributed; 2417 using boost::graph::distributed::ilu_default::process_group_type; 2418 using boost::graph::ilu_permuted; 2419 2420 PetscBool row_identity, col_identity; 2421 PetscContainer c; 2422 PetscInt m, n, M, N; 2423 PetscErrorCode ierr; 2424 2425 PetscFunctionBegin; 2426 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2427 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2428 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2429 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2430 2431 process_group_type pg; 2432 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2433 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2434 lgraph_type& level_graph = *lgraph_p; 2435 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2436 2437 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2438 ilu_permuted(level_graph); 2439 2440 /* put together the new matrix */ 2441 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2442 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2443 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2444 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2445 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2446 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2447 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2448 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2449 2450 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2451 ierr = PetscContainerSetPointer(c, lgraph_p); 2452 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2453 ierr = PetscContainerDestroy(&c); 2454 PetscFunctionReturn(0); 2455 } 2456 2457 #undef __FUNCT__ 2458 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2459 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2460 { 2461 PetscFunctionBegin; 2462 PetscFunctionReturn(0); 2463 } 2464 2465 #undef __FUNCT__ 2466 #define __FUNCT__ "MatSolve_MPIAIJ" 2467 /* 2468 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2469 */ 2470 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2471 { 2472 namespace graph_dist = boost::graph::distributed; 2473 2474 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2475 lgraph_type *lgraph_p; 2476 PetscContainer c; 2477 PetscErrorCode ierr; 2478 2479 PetscFunctionBegin; 2480 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2481 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2482 ierr = VecCopy(b, x);CHKERRQ(ierr); 2483 2484 PetscScalar *array_x; 2485 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2486 PetscInt sx; 2487 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2488 2489 PetscScalar *array_b; 2490 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2491 PetscInt sb; 2492 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2493 2494 lgraph_type& level_graph = *lgraph_p; 2495 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2496 2497 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2498 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2499 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2500 2501 typedef boost::iterator_property_map<array_ref_type::iterator, 2502 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2503 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2504 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2505 2506 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2507 PetscFunctionReturn(0); 2508 } 2509 #endif 2510 2511 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2512 PetscInt nzlocal,nsends,nrecvs; 2513 PetscMPIInt *send_rank,*recv_rank; 2514 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2515 PetscScalar *sbuf_a,**rbuf_a; 2516 PetscErrorCode (*Destroy)(Mat); 2517 } Mat_Redundant; 2518 2519 #undef __FUNCT__ 2520 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2521 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2522 { 2523 PetscErrorCode ierr; 2524 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2525 PetscInt i; 2526 2527 PetscFunctionBegin; 2528 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2529 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2530 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2531 for (i=0; i<redund->nrecvs; i++) { 2532 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2533 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2534 } 2535 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2536 ierr = PetscFree(redund);CHKERRQ(ierr); 2537 PetscFunctionReturn(0); 2538 } 2539 2540 #undef __FUNCT__ 2541 #define __FUNCT__ "MatDestroy_MatRedundant" 2542 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2543 { 2544 PetscErrorCode ierr; 2545 PetscContainer container; 2546 Mat_Redundant *redund=NULL; 2547 2548 PetscFunctionBegin; 2549 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2550 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2551 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2552 2553 A->ops->destroy = redund->Destroy; 2554 2555 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2556 if (A->ops->destroy) { 2557 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2558 } 2559 PetscFunctionReturn(0); 2560 } 2561 2562 #undef __FUNCT__ 2563 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2564 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2565 { 2566 PetscMPIInt rank,size; 2567 MPI_Comm comm; 2568 PetscErrorCode ierr; 2569 PetscInt nsends = 0,nrecvs=0,i,rownz_max=0; 2570 PetscMPIInt *send_rank= NULL,*recv_rank=NULL; 2571 PetscInt *rowrange = mat->rmap->range; 2572 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2573 Mat A = aij->A,B=aij->B,C=*matredundant; 2574 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2575 PetscScalar *sbuf_a; 2576 PetscInt nzlocal=a->nz+b->nz; 2577 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2578 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2579 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2580 MatScalar *aworkA,*aworkB; 2581 PetscScalar *vals; 2582 PetscMPIInt tag1,tag2,tag3,imdex; 2583 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2584 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2585 MPI_Status recv_status,*send_status; 2586 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2587 PetscInt **rbuf_j=NULL; 2588 PetscScalar **rbuf_a=NULL; 2589 Mat_Redundant *redund =NULL; 2590 PetscContainer container; 2591 2592 PetscFunctionBegin; 2593 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2594 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2595 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2596 2597 if (reuse == MAT_REUSE_MATRIX) { 2598 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2599 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2600 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2601 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2602 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2603 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2604 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2605 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2606 2607 nsends = redund->nsends; 2608 nrecvs = redund->nrecvs; 2609 send_rank = redund->send_rank; 2610 recv_rank = redund->recv_rank; 2611 sbuf_nz = redund->sbuf_nz; 2612 rbuf_nz = redund->rbuf_nz; 2613 sbuf_j = redund->sbuf_j; 2614 sbuf_a = redund->sbuf_a; 2615 rbuf_j = redund->rbuf_j; 2616 rbuf_a = redund->rbuf_a; 2617 } 2618 2619 if (reuse == MAT_INITIAL_MATRIX) { 2620 PetscMPIInt subrank,subsize; 2621 PetscInt nleftover,np_subcomm; 2622 /* get the destination processors' id send_rank, nsends and nrecvs */ 2623 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2624 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2625 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2626 2627 np_subcomm = size/nsubcomm; 2628 nleftover = size - nsubcomm*np_subcomm; 2629 2630 nsends = 0; nrecvs = 0; 2631 for (i=0; i<size; i++) { /* i=rank*/ 2632 if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */ 2633 send_rank[nsends] = i; nsends++; 2634 recv_rank[nrecvs++] = i; 2635 } 2636 } 2637 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2638 i = size-nleftover-1; 2639 j = 0; 2640 while (j < nsubcomm - nleftover) { 2641 send_rank[nsends++] = i; 2642 i--; j++; 2643 } 2644 } 2645 2646 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2647 for (i=0; i<nleftover; i++) { 2648 recv_rank[nrecvs++] = size-nleftover+i; 2649 } 2650 } 2651 2652 /* allocate sbuf_j, sbuf_a */ 2653 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2654 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2655 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2656 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2657 2658 /* copy mat's local entries into the buffers */ 2659 if (reuse == MAT_INITIAL_MATRIX) { 2660 rownz_max = 0; 2661 rptr = sbuf_j; 2662 cols = sbuf_j + rend-rstart + 1; 2663 vals = sbuf_a; 2664 rptr[0] = 0; 2665 for (i=0; i<rend-rstart; i++) { 2666 row = i + rstart; 2667 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2668 ncols = nzA + nzB; 2669 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2670 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2671 /* load the column indices for this row into cols */ 2672 lwrite = 0; 2673 for (l=0; l<nzB; l++) { 2674 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2675 vals[lwrite] = aworkB[l]; 2676 cols[lwrite++] = ctmp; 2677 } 2678 } 2679 for (l=0; l<nzA; l++) { 2680 vals[lwrite] = aworkA[l]; 2681 cols[lwrite++] = cstart + cworkA[l]; 2682 } 2683 for (l=0; l<nzB; l++) { 2684 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2685 vals[lwrite] = aworkB[l]; 2686 cols[lwrite++] = ctmp; 2687 } 2688 } 2689 vals += ncols; 2690 cols += ncols; 2691 rptr[i+1] = rptr[i] + ncols; 2692 if (rownz_max < ncols) rownz_max = ncols; 2693 } 2694 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2695 } else { /* only copy matrix values into sbuf_a */ 2696 rptr = sbuf_j; 2697 vals = sbuf_a; 2698 rptr[0] = 0; 2699 for (i=0; i<rend-rstart; i++) { 2700 row = i + rstart; 2701 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2702 ncols = nzA + nzB; 2703 cworkB = b->j + b->i[i]; 2704 aworkA = a->a + a->i[i]; 2705 aworkB = b->a + b->i[i]; 2706 lwrite = 0; 2707 for (l=0; l<nzB; l++) { 2708 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2709 } 2710 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2711 for (l=0; l<nzB; l++) { 2712 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2713 } 2714 vals += ncols; 2715 rptr[i+1] = rptr[i] + ncols; 2716 } 2717 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2718 2719 /* send nzlocal to others, and recv other's nzlocal */ 2720 /*--------------------------------------------------*/ 2721 if (reuse == MAT_INITIAL_MATRIX) { 2722 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2723 2724 s_waits2 = s_waits3 + nsends; 2725 s_waits1 = s_waits2 + nsends; 2726 r_waits1 = s_waits1 + nsends; 2727 r_waits2 = r_waits1 + nrecvs; 2728 r_waits3 = r_waits2 + nrecvs; 2729 } else { 2730 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2731 2732 r_waits3 = s_waits3 + nsends; 2733 } 2734 2735 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2736 if (reuse == MAT_INITIAL_MATRIX) { 2737 /* get new tags to keep the communication clean */ 2738 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2739 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2740 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2741 2742 /* post receives of other's nzlocal */ 2743 for (i=0; i<nrecvs; i++) { 2744 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2745 } 2746 /* send nzlocal to others */ 2747 for (i=0; i<nsends; i++) { 2748 sbuf_nz[i] = nzlocal; 2749 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2750 } 2751 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2752 count = nrecvs; 2753 while (count) { 2754 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2755 2756 recv_rank[imdex] = recv_status.MPI_SOURCE; 2757 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2758 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2759 2760 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2761 2762 rbuf_nz[imdex] += i + 2; 2763 2764 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2765 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2766 count--; 2767 } 2768 /* wait on sends of nzlocal */ 2769 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2770 /* send mat->i,j to others, and recv from other's */ 2771 /*------------------------------------------------*/ 2772 for (i=0; i<nsends; i++) { 2773 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2774 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2775 } 2776 /* wait on receives of mat->i,j */ 2777 /*------------------------------*/ 2778 count = nrecvs; 2779 while (count) { 2780 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2781 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2782 count--; 2783 } 2784 /* wait on sends of mat->i,j */ 2785 /*---------------------------*/ 2786 if (nsends) { 2787 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2788 } 2789 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2790 2791 /* post receives, send and receive mat->a */ 2792 /*----------------------------------------*/ 2793 for (imdex=0; imdex<nrecvs; imdex++) { 2794 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2795 } 2796 for (i=0; i<nsends; i++) { 2797 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2798 } 2799 count = nrecvs; 2800 while (count) { 2801 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2802 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2803 count--; 2804 } 2805 if (nsends) { 2806 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2807 } 2808 2809 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2810 2811 /* create redundant matrix */ 2812 /*-------------------------*/ 2813 if (reuse == MAT_INITIAL_MATRIX) { 2814 /* compute rownz_max for preallocation */ 2815 for (imdex=0; imdex<nrecvs; imdex++) { 2816 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2817 rptr = rbuf_j[imdex]; 2818 for (i=0; i<j; i++) { 2819 ncols = rptr[i+1] - rptr[i]; 2820 if (rownz_max < ncols) rownz_max = ncols; 2821 } 2822 } 2823 2824 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2825 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2826 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2827 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2828 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2829 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2830 } else { 2831 C = *matredundant; 2832 } 2833 2834 /* insert local matrix entries */ 2835 rptr = sbuf_j; 2836 cols = sbuf_j + rend-rstart + 1; 2837 vals = sbuf_a; 2838 for (i=0; i<rend-rstart; i++) { 2839 row = i + rstart; 2840 ncols = rptr[i+1] - rptr[i]; 2841 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2842 vals += ncols; 2843 cols += ncols; 2844 } 2845 /* insert received matrix entries */ 2846 for (imdex=0; imdex<nrecvs; imdex++) { 2847 rstart = rowrange[recv_rank[imdex]]; 2848 rend = rowrange[recv_rank[imdex]+1]; 2849 rptr = rbuf_j[imdex]; 2850 cols = rbuf_j[imdex] + rend-rstart + 1; 2851 vals = rbuf_a[imdex]; 2852 for (i=0; i<rend-rstart; i++) { 2853 row = i + rstart; 2854 ncols = rptr[i+1] - rptr[i]; 2855 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2856 vals += ncols; 2857 cols += ncols; 2858 } 2859 } 2860 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2861 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2862 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2863 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2864 if (reuse == MAT_INITIAL_MATRIX) { 2865 PetscContainer container; 2866 *matredundant = C; 2867 /* create a supporting struct and attach it to C for reuse */ 2868 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2869 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2870 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2871 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2872 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2873 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2874 2875 redund->nzlocal = nzlocal; 2876 redund->nsends = nsends; 2877 redund->nrecvs = nrecvs; 2878 redund->send_rank = send_rank; 2879 redund->recv_rank = recv_rank; 2880 redund->sbuf_nz = sbuf_nz; 2881 redund->rbuf_nz = rbuf_nz; 2882 redund->sbuf_j = sbuf_j; 2883 redund->sbuf_a = sbuf_a; 2884 redund->rbuf_j = rbuf_j; 2885 redund->rbuf_a = rbuf_a; 2886 2887 redund->Destroy = C->ops->destroy; 2888 C->ops->destroy = MatDestroy_MatRedundant; 2889 } 2890 PetscFunctionReturn(0); 2891 } 2892 2893 #undef __FUNCT__ 2894 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2895 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2896 { 2897 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2898 PetscErrorCode ierr; 2899 PetscInt i,*idxb = 0; 2900 PetscScalar *va,*vb; 2901 Vec vtmp; 2902 2903 PetscFunctionBegin; 2904 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2905 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2906 if (idx) { 2907 for (i=0; i<A->rmap->n; i++) { 2908 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2909 } 2910 } 2911 2912 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2913 if (idx) { 2914 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2915 } 2916 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2917 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2918 2919 for (i=0; i<A->rmap->n; i++) { 2920 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2921 va[i] = vb[i]; 2922 if (idx) idx[i] = a->garray[idxb[i]]; 2923 } 2924 } 2925 2926 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2927 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2928 ierr = PetscFree(idxb);CHKERRQ(ierr); 2929 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2930 PetscFunctionReturn(0); 2931 } 2932 2933 #undef __FUNCT__ 2934 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2935 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2936 { 2937 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2938 PetscErrorCode ierr; 2939 PetscInt i,*idxb = 0; 2940 PetscScalar *va,*vb; 2941 Vec vtmp; 2942 2943 PetscFunctionBegin; 2944 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2945 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2946 if (idx) { 2947 for (i=0; i<A->cmap->n; i++) { 2948 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2949 } 2950 } 2951 2952 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2953 if (idx) { 2954 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2955 } 2956 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2957 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2958 2959 for (i=0; i<A->rmap->n; i++) { 2960 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2961 va[i] = vb[i]; 2962 if (idx) idx[i] = a->garray[idxb[i]]; 2963 } 2964 } 2965 2966 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2967 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2968 ierr = PetscFree(idxb);CHKERRQ(ierr); 2969 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2970 PetscFunctionReturn(0); 2971 } 2972 2973 #undef __FUNCT__ 2974 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2975 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2976 { 2977 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2978 PetscInt n = A->rmap->n; 2979 PetscInt cstart = A->cmap->rstart; 2980 PetscInt *cmap = mat->garray; 2981 PetscInt *diagIdx, *offdiagIdx; 2982 Vec diagV, offdiagV; 2983 PetscScalar *a, *diagA, *offdiagA; 2984 PetscInt r; 2985 PetscErrorCode ierr; 2986 2987 PetscFunctionBegin; 2988 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2989 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2990 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2991 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2992 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2993 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2994 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2995 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2996 for (r = 0; r < n; ++r) { 2997 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2998 a[r] = diagA[r]; 2999 idx[r] = cstart + diagIdx[r]; 3000 } else { 3001 a[r] = offdiagA[r]; 3002 idx[r] = cmap[offdiagIdx[r]]; 3003 } 3004 } 3005 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3006 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3007 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3008 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3009 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3010 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3011 PetscFunctionReturn(0); 3012 } 3013 3014 #undef __FUNCT__ 3015 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3016 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3017 { 3018 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3019 PetscInt n = A->rmap->n; 3020 PetscInt cstart = A->cmap->rstart; 3021 PetscInt *cmap = mat->garray; 3022 PetscInt *diagIdx, *offdiagIdx; 3023 Vec diagV, offdiagV; 3024 PetscScalar *a, *diagA, *offdiagA; 3025 PetscInt r; 3026 PetscErrorCode ierr; 3027 3028 PetscFunctionBegin; 3029 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3030 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3031 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3032 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3033 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3034 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3035 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3036 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3037 for (r = 0; r < n; ++r) { 3038 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3039 a[r] = diagA[r]; 3040 idx[r] = cstart + diagIdx[r]; 3041 } else { 3042 a[r] = offdiagA[r]; 3043 idx[r] = cmap[offdiagIdx[r]]; 3044 } 3045 } 3046 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3047 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3048 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3049 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3050 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3051 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3052 PetscFunctionReturn(0); 3053 } 3054 3055 #undef __FUNCT__ 3056 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3057 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3058 { 3059 PetscErrorCode ierr; 3060 Mat *dummy; 3061 3062 PetscFunctionBegin; 3063 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3064 *newmat = *dummy; 3065 ierr = PetscFree(dummy);CHKERRQ(ierr); 3066 PetscFunctionReturn(0); 3067 } 3068 3069 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3070 3071 #undef __FUNCT__ 3072 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3073 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3074 { 3075 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3076 PetscErrorCode ierr; 3077 3078 PetscFunctionBegin; 3079 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3080 PetscFunctionReturn(0); 3081 } 3082 3083 #undef __FUNCT__ 3084 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3085 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3086 { 3087 PetscErrorCode ierr; 3088 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3089 3090 PetscFunctionBegin; 3091 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3092 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3093 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3094 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3095 PetscFunctionReturn(0); 3096 } 3097 3098 /* -------------------------------------------------------------------*/ 3099 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3100 MatGetRow_MPIAIJ, 3101 MatRestoreRow_MPIAIJ, 3102 MatMult_MPIAIJ, 3103 /* 4*/ MatMultAdd_MPIAIJ, 3104 MatMultTranspose_MPIAIJ, 3105 MatMultTransposeAdd_MPIAIJ, 3106 #if defined(PETSC_HAVE_PBGL) 3107 MatSolve_MPIAIJ, 3108 #else 3109 0, 3110 #endif 3111 0, 3112 0, 3113 /*10*/ 0, 3114 0, 3115 0, 3116 MatSOR_MPIAIJ, 3117 MatTranspose_MPIAIJ, 3118 /*15*/ MatGetInfo_MPIAIJ, 3119 MatEqual_MPIAIJ, 3120 MatGetDiagonal_MPIAIJ, 3121 MatDiagonalScale_MPIAIJ, 3122 MatNorm_MPIAIJ, 3123 /*20*/ MatAssemblyBegin_MPIAIJ, 3124 MatAssemblyEnd_MPIAIJ, 3125 MatSetOption_MPIAIJ, 3126 MatZeroEntries_MPIAIJ, 3127 /*24*/ MatZeroRows_MPIAIJ, 3128 0, 3129 #if defined(PETSC_HAVE_PBGL) 3130 0, 3131 #else 3132 0, 3133 #endif 3134 0, 3135 0, 3136 /*29*/ MatSetUp_MPIAIJ, 3137 #if defined(PETSC_HAVE_PBGL) 3138 0, 3139 #else 3140 0, 3141 #endif 3142 0, 3143 0, 3144 0, 3145 /*34*/ MatDuplicate_MPIAIJ, 3146 0, 3147 0, 3148 0, 3149 0, 3150 /*39*/ MatAXPY_MPIAIJ, 3151 MatGetSubMatrices_MPIAIJ, 3152 MatIncreaseOverlap_MPIAIJ, 3153 MatGetValues_MPIAIJ, 3154 MatCopy_MPIAIJ, 3155 /*44*/ MatGetRowMax_MPIAIJ, 3156 MatScale_MPIAIJ, 3157 0, 3158 0, 3159 MatZeroRowsColumns_MPIAIJ, 3160 /*49*/ MatSetRandom_MPIAIJ, 3161 0, 3162 0, 3163 0, 3164 0, 3165 /*54*/ MatFDColoringCreate_MPIAIJ, 3166 0, 3167 MatSetUnfactored_MPIAIJ, 3168 MatPermute_MPIAIJ, 3169 0, 3170 /*59*/ MatGetSubMatrix_MPIAIJ, 3171 MatDestroy_MPIAIJ, 3172 MatView_MPIAIJ, 3173 0, 3174 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3175 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3176 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3177 0, 3178 0, 3179 0, 3180 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3181 MatGetRowMinAbs_MPIAIJ, 3182 0, 3183 MatSetColoring_MPIAIJ, 3184 0, 3185 MatSetValuesAdifor_MPIAIJ, 3186 /*75*/ MatFDColoringApply_AIJ, 3187 0, 3188 0, 3189 0, 3190 MatFindZeroDiagonals_MPIAIJ, 3191 /*80*/ 0, 3192 0, 3193 0, 3194 /*83*/ MatLoad_MPIAIJ, 3195 0, 3196 0, 3197 0, 3198 0, 3199 0, 3200 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3201 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3202 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3203 MatPtAP_MPIAIJ_MPIAIJ, 3204 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3205 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3206 0, 3207 0, 3208 0, 3209 0, 3210 /*99*/ 0, 3211 0, 3212 0, 3213 MatConjugate_MPIAIJ, 3214 0, 3215 /*104*/MatSetValuesRow_MPIAIJ, 3216 MatRealPart_MPIAIJ, 3217 MatImaginaryPart_MPIAIJ, 3218 0, 3219 0, 3220 /*109*/0, 3221 MatGetRedundantMatrix_MPIAIJ, 3222 MatGetRowMin_MPIAIJ, 3223 0, 3224 0, 3225 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3226 0, 3227 0, 3228 0, 3229 0, 3230 /*119*/0, 3231 0, 3232 0, 3233 0, 3234 MatGetMultiProcBlock_MPIAIJ, 3235 /*124*/MatFindNonzeroRows_MPIAIJ, 3236 MatGetColumnNorms_MPIAIJ, 3237 MatInvertBlockDiagonal_MPIAIJ, 3238 0, 3239 MatGetSubMatricesParallel_MPIAIJ, 3240 /*129*/0, 3241 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3242 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3243 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3244 0, 3245 /*134*/0, 3246 0, 3247 0, 3248 0, 3249 0, 3250 /*139*/0, 3251 0 3252 }; 3253 3254 /* ----------------------------------------------------------------------------------------*/ 3255 3256 #undef __FUNCT__ 3257 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3258 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3259 { 3260 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3261 PetscErrorCode ierr; 3262 3263 PetscFunctionBegin; 3264 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3265 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3266 PetscFunctionReturn(0); 3267 } 3268 3269 #undef __FUNCT__ 3270 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3271 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3272 { 3273 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3274 PetscErrorCode ierr; 3275 3276 PetscFunctionBegin; 3277 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3278 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3279 PetscFunctionReturn(0); 3280 } 3281 3282 #undef __FUNCT__ 3283 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3284 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3285 { 3286 Mat_MPIAIJ *b; 3287 PetscErrorCode ierr; 3288 PetscInt i; 3289 3290 PetscFunctionBegin; 3291 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3292 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3293 if (d_nnz) { 3294 for (i=0; i<B->rmap->n; i++) { 3295 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3296 } 3297 } 3298 if (o_nnz) { 3299 for (i=0; i<B->rmap->n; i++) { 3300 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3301 } 3302 } 3303 b = (Mat_MPIAIJ*)B->data; 3304 3305 if (!B->preallocated) { 3306 /* Explicitly create 2 MATSEQAIJ matrices. */ 3307 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3308 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3309 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3310 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3311 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3312 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3313 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3314 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3315 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3316 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3317 } 3318 3319 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3320 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3321 B->preallocated = PETSC_TRUE; 3322 PetscFunctionReturn(0); 3323 } 3324 3325 #undef __FUNCT__ 3326 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3327 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3328 { 3329 Mat mat; 3330 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3331 PetscErrorCode ierr; 3332 3333 PetscFunctionBegin; 3334 *newmat = 0; 3335 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3336 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3337 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3338 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3339 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3340 a = (Mat_MPIAIJ*)mat->data; 3341 3342 mat->factortype = matin->factortype; 3343 mat->rmap->bs = matin->rmap->bs; 3344 mat->cmap->bs = matin->cmap->bs; 3345 mat->assembled = PETSC_TRUE; 3346 mat->insertmode = NOT_SET_VALUES; 3347 mat->preallocated = PETSC_TRUE; 3348 3349 a->size = oldmat->size; 3350 a->rank = oldmat->rank; 3351 a->donotstash = oldmat->donotstash; 3352 a->roworiented = oldmat->roworiented; 3353 a->rowindices = 0; 3354 a->rowvalues = 0; 3355 a->getrowactive = PETSC_FALSE; 3356 3357 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3358 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3359 3360 if (oldmat->colmap) { 3361 #if defined(PETSC_USE_CTABLE) 3362 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3363 #else 3364 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3365 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3366 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3367 #endif 3368 } else a->colmap = 0; 3369 if (oldmat->garray) { 3370 PetscInt len; 3371 len = oldmat->B->cmap->n; 3372 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3373 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3374 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3375 } else a->garray = 0; 3376 3377 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3378 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3379 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3380 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3381 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3382 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3383 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3384 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3385 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3386 *newmat = mat; 3387 PetscFunctionReturn(0); 3388 } 3389 3390 3391 3392 #undef __FUNCT__ 3393 #define __FUNCT__ "MatLoad_MPIAIJ" 3394 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3395 { 3396 PetscScalar *vals,*svals; 3397 MPI_Comm comm; 3398 PetscErrorCode ierr; 3399 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3400 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3401 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3402 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3403 PetscInt cend,cstart,n,*rowners,sizesset=1; 3404 int fd; 3405 PetscInt bs = 1; 3406 3407 PetscFunctionBegin; 3408 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3409 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3410 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3411 if (!rank) { 3412 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3413 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3414 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3415 } 3416 3417 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3418 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3419 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3420 3421 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3422 3423 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3424 M = header[1]; N = header[2]; 3425 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3426 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3427 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3428 3429 /* If global sizes are set, check if they are consistent with that given in the file */ 3430 if (sizesset) { 3431 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3432 } 3433 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3434 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3435 3436 /* determine ownership of all (block) rows */ 3437 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3438 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3439 else m = newMat->rmap->n; /* Set by user */ 3440 3441 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3442 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3443 3444 /* First process needs enough room for process with most rows */ 3445 if (!rank) { 3446 mmax = rowners[1]; 3447 for (i=2; i<=size; i++) { 3448 mmax = PetscMax(mmax, rowners[i]); 3449 } 3450 } else mmax = -1; /* unused, but compilers complain */ 3451 3452 rowners[0] = 0; 3453 for (i=2; i<=size; i++) { 3454 rowners[i] += rowners[i-1]; 3455 } 3456 rstart = rowners[rank]; 3457 rend = rowners[rank+1]; 3458 3459 /* distribute row lengths to all processors */ 3460 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 3461 if (!rank) { 3462 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3463 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3464 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3465 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3466 for (j=0; j<m; j++) { 3467 procsnz[0] += ourlens[j]; 3468 } 3469 for (i=1; i<size; i++) { 3470 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3471 /* calculate the number of nonzeros on each processor */ 3472 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3473 procsnz[i] += rowlengths[j]; 3474 } 3475 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3476 } 3477 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3478 } else { 3479 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3480 } 3481 3482 if (!rank) { 3483 /* determine max buffer needed and allocate it */ 3484 maxnz = 0; 3485 for (i=0; i<size; i++) { 3486 maxnz = PetscMax(maxnz,procsnz[i]); 3487 } 3488 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3489 3490 /* read in my part of the matrix column indices */ 3491 nz = procsnz[0]; 3492 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3493 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3494 3495 /* read in every one elses and ship off */ 3496 for (i=1; i<size; i++) { 3497 nz = procsnz[i]; 3498 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3499 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3500 } 3501 ierr = PetscFree(cols);CHKERRQ(ierr); 3502 } else { 3503 /* determine buffer space needed for message */ 3504 nz = 0; 3505 for (i=0; i<m; i++) { 3506 nz += ourlens[i]; 3507 } 3508 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3509 3510 /* receive message of column indices*/ 3511 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3512 } 3513 3514 /* determine column ownership if matrix is not square */ 3515 if (N != M) { 3516 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3517 else n = newMat->cmap->n; 3518 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3519 cstart = cend - n; 3520 } else { 3521 cstart = rstart; 3522 cend = rend; 3523 n = cend - cstart; 3524 } 3525 3526 /* loop over local rows, determining number of off diagonal entries */ 3527 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3528 jj = 0; 3529 for (i=0; i<m; i++) { 3530 for (j=0; j<ourlens[i]; j++) { 3531 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3532 jj++; 3533 } 3534 } 3535 3536 for (i=0; i<m; i++) { 3537 ourlens[i] -= offlens[i]; 3538 } 3539 if (!sizesset) { 3540 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3541 } 3542 3543 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3544 3545 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3546 3547 for (i=0; i<m; i++) { 3548 ourlens[i] += offlens[i]; 3549 } 3550 3551 if (!rank) { 3552 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3553 3554 /* read in my part of the matrix numerical values */ 3555 nz = procsnz[0]; 3556 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3557 3558 /* insert into matrix */ 3559 jj = rstart; 3560 smycols = mycols; 3561 svals = vals; 3562 for (i=0; i<m; i++) { 3563 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3564 smycols += ourlens[i]; 3565 svals += ourlens[i]; 3566 jj++; 3567 } 3568 3569 /* read in other processors and ship out */ 3570 for (i=1; i<size; i++) { 3571 nz = procsnz[i]; 3572 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3573 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3574 } 3575 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3576 } else { 3577 /* receive numeric values */ 3578 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3579 3580 /* receive message of values*/ 3581 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3582 3583 /* insert into matrix */ 3584 jj = rstart; 3585 smycols = mycols; 3586 svals = vals; 3587 for (i=0; i<m; i++) { 3588 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3589 smycols += ourlens[i]; 3590 svals += ourlens[i]; 3591 jj++; 3592 } 3593 } 3594 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3595 ierr = PetscFree(vals);CHKERRQ(ierr); 3596 ierr = PetscFree(mycols);CHKERRQ(ierr); 3597 ierr = PetscFree(rowners);CHKERRQ(ierr); 3598 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3599 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3600 PetscFunctionReturn(0); 3601 } 3602 3603 #undef __FUNCT__ 3604 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3605 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3606 { 3607 PetscErrorCode ierr; 3608 IS iscol_local; 3609 PetscInt csize; 3610 3611 PetscFunctionBegin; 3612 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3613 if (call == MAT_REUSE_MATRIX) { 3614 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3615 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3616 } else { 3617 PetscInt cbs; 3618 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3619 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3620 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3621 } 3622 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3623 if (call == MAT_INITIAL_MATRIX) { 3624 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3625 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3626 } 3627 PetscFunctionReturn(0); 3628 } 3629 3630 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3631 #undef __FUNCT__ 3632 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3633 /* 3634 Not great since it makes two copies of the submatrix, first an SeqAIJ 3635 in local and then by concatenating the local matrices the end result. 3636 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3637 3638 Note: This requires a sequential iscol with all indices. 3639 */ 3640 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3641 { 3642 PetscErrorCode ierr; 3643 PetscMPIInt rank,size; 3644 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3645 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3646 PetscBool allcolumns, colflag; 3647 Mat M,Mreuse; 3648 MatScalar *vwork,*aa; 3649 MPI_Comm comm; 3650 Mat_SeqAIJ *aij; 3651 3652 PetscFunctionBegin; 3653 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3654 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3655 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3656 3657 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3658 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3659 if (colflag && ncol == mat->cmap->N) { 3660 allcolumns = PETSC_TRUE; 3661 } else { 3662 allcolumns = PETSC_FALSE; 3663 } 3664 if (call == MAT_REUSE_MATRIX) { 3665 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3666 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3667 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3668 } else { 3669 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3670 } 3671 3672 /* 3673 m - number of local rows 3674 n - number of columns (same on all processors) 3675 rstart - first row in new global matrix generated 3676 */ 3677 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3678 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3679 if (call == MAT_INITIAL_MATRIX) { 3680 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3681 ii = aij->i; 3682 jj = aij->j; 3683 3684 /* 3685 Determine the number of non-zeros in the diagonal and off-diagonal 3686 portions of the matrix in order to do correct preallocation 3687 */ 3688 3689 /* first get start and end of "diagonal" columns */ 3690 if (csize == PETSC_DECIDE) { 3691 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3692 if (mglobal == n) { /* square matrix */ 3693 nlocal = m; 3694 } else { 3695 nlocal = n/size + ((n % size) > rank); 3696 } 3697 } else { 3698 nlocal = csize; 3699 } 3700 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3701 rstart = rend - nlocal; 3702 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3703 3704 /* next, compute all the lengths */ 3705 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3706 olens = dlens + m; 3707 for (i=0; i<m; i++) { 3708 jend = ii[i+1] - ii[i]; 3709 olen = 0; 3710 dlen = 0; 3711 for (j=0; j<jend; j++) { 3712 if (*jj < rstart || *jj >= rend) olen++; 3713 else dlen++; 3714 jj++; 3715 } 3716 olens[i] = olen; 3717 dlens[i] = dlen; 3718 } 3719 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3720 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3721 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3722 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3723 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3724 ierr = PetscFree(dlens);CHKERRQ(ierr); 3725 } else { 3726 PetscInt ml,nl; 3727 3728 M = *newmat; 3729 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3730 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3731 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3732 /* 3733 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3734 rather than the slower MatSetValues(). 3735 */ 3736 M->was_assembled = PETSC_TRUE; 3737 M->assembled = PETSC_FALSE; 3738 } 3739 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3740 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3741 ii = aij->i; 3742 jj = aij->j; 3743 aa = aij->a; 3744 for (i=0; i<m; i++) { 3745 row = rstart + i; 3746 nz = ii[i+1] - ii[i]; 3747 cwork = jj; jj += nz; 3748 vwork = aa; aa += nz; 3749 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3750 } 3751 3752 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3753 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3754 *newmat = M; 3755 3756 /* save submatrix used in processor for next request */ 3757 if (call == MAT_INITIAL_MATRIX) { 3758 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3759 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3760 } 3761 PetscFunctionReturn(0); 3762 } 3763 3764 #undef __FUNCT__ 3765 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3766 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3767 { 3768 PetscInt m,cstart, cend,j,nnz,i,d; 3769 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3770 const PetscInt *JJ; 3771 PetscScalar *values; 3772 PetscErrorCode ierr; 3773 3774 PetscFunctionBegin; 3775 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3776 3777 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3778 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3779 m = B->rmap->n; 3780 cstart = B->cmap->rstart; 3781 cend = B->cmap->rend; 3782 rstart = B->rmap->rstart; 3783 3784 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3785 3786 #if defined(PETSC_USE_DEBUGGING) 3787 for (i=0; i<m; i++) { 3788 nnz = Ii[i+1]- Ii[i]; 3789 JJ = J + Ii[i]; 3790 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3791 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3792 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3793 } 3794 #endif 3795 3796 for (i=0; i<m; i++) { 3797 nnz = Ii[i+1]- Ii[i]; 3798 JJ = J + Ii[i]; 3799 nnz_max = PetscMax(nnz_max,nnz); 3800 d = 0; 3801 for (j=0; j<nnz; j++) { 3802 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3803 } 3804 d_nnz[i] = d; 3805 o_nnz[i] = nnz - d; 3806 } 3807 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3808 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3809 3810 if (v) values = (PetscScalar*)v; 3811 else { 3812 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3813 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3814 } 3815 3816 for (i=0; i<m; i++) { 3817 ii = i + rstart; 3818 nnz = Ii[i+1]- Ii[i]; 3819 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3820 } 3821 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3822 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3823 3824 if (!v) { 3825 ierr = PetscFree(values);CHKERRQ(ierr); 3826 } 3827 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3828 PetscFunctionReturn(0); 3829 } 3830 3831 #undef __FUNCT__ 3832 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3833 /*@ 3834 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3835 (the default parallel PETSc format). 3836 3837 Collective on MPI_Comm 3838 3839 Input Parameters: 3840 + B - the matrix 3841 . i - the indices into j for the start of each local row (starts with zero) 3842 . j - the column indices for each local row (starts with zero) 3843 - v - optional values in the matrix 3844 3845 Level: developer 3846 3847 Notes: 3848 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3849 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3850 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3851 3852 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3853 3854 The format which is used for the sparse matrix input, is equivalent to a 3855 row-major ordering.. i.e for the following matrix, the input data expected is 3856 as shown: 3857 3858 1 0 0 3859 2 0 3 P0 3860 ------- 3861 4 5 6 P1 3862 3863 Process0 [P0]: rows_owned=[0,1] 3864 i = {0,1,3} [size = nrow+1 = 2+1] 3865 j = {0,0,2} [size = nz = 6] 3866 v = {1,2,3} [size = nz = 6] 3867 3868 Process1 [P1]: rows_owned=[2] 3869 i = {0,3} [size = nrow+1 = 1+1] 3870 j = {0,1,2} [size = nz = 6] 3871 v = {4,5,6} [size = nz = 6] 3872 3873 .keywords: matrix, aij, compressed row, sparse, parallel 3874 3875 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3876 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3877 @*/ 3878 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3879 { 3880 PetscErrorCode ierr; 3881 3882 PetscFunctionBegin; 3883 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3884 PetscFunctionReturn(0); 3885 } 3886 3887 #undef __FUNCT__ 3888 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3889 /*@C 3890 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3891 (the default parallel PETSc format). For good matrix assembly performance 3892 the user should preallocate the matrix storage by setting the parameters 3893 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3894 performance can be increased by more than a factor of 50. 3895 3896 Collective on MPI_Comm 3897 3898 Input Parameters: 3899 + A - the matrix 3900 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3901 (same value is used for all local rows) 3902 . d_nnz - array containing the number of nonzeros in the various rows of the 3903 DIAGONAL portion of the local submatrix (possibly different for each row) 3904 or NULL, if d_nz is used to specify the nonzero structure. 3905 The size of this array is equal to the number of local rows, i.e 'm'. 3906 For matrices that will be factored, you must leave room for (and set) 3907 the diagonal entry even if it is zero. 3908 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3909 submatrix (same value is used for all local rows). 3910 - o_nnz - array containing the number of nonzeros in the various rows of the 3911 OFF-DIAGONAL portion of the local submatrix (possibly different for 3912 each row) or NULL, if o_nz is used to specify the nonzero 3913 structure. The size of this array is equal to the number 3914 of local rows, i.e 'm'. 3915 3916 If the *_nnz parameter is given then the *_nz parameter is ignored 3917 3918 The AIJ format (also called the Yale sparse matrix format or 3919 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3920 storage. The stored row and column indices begin with zero. 3921 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3922 3923 The parallel matrix is partitioned such that the first m0 rows belong to 3924 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3925 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3926 3927 The DIAGONAL portion of the local submatrix of a processor can be defined 3928 as the submatrix which is obtained by extraction the part corresponding to 3929 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3930 first row that belongs to the processor, r2 is the last row belonging to 3931 the this processor, and c1-c2 is range of indices of the local part of a 3932 vector suitable for applying the matrix to. This is an mxn matrix. In the 3933 common case of a square matrix, the row and column ranges are the same and 3934 the DIAGONAL part is also square. The remaining portion of the local 3935 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3936 3937 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3938 3939 You can call MatGetInfo() to get information on how effective the preallocation was; 3940 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3941 You can also run with the option -info and look for messages with the string 3942 malloc in them to see if additional memory allocation was needed. 3943 3944 Example usage: 3945 3946 Consider the following 8x8 matrix with 34 non-zero values, that is 3947 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3948 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3949 as follows: 3950 3951 .vb 3952 1 2 0 | 0 3 0 | 0 4 3953 Proc0 0 5 6 | 7 0 0 | 8 0 3954 9 0 10 | 11 0 0 | 12 0 3955 ------------------------------------- 3956 13 0 14 | 15 16 17 | 0 0 3957 Proc1 0 18 0 | 19 20 21 | 0 0 3958 0 0 0 | 22 23 0 | 24 0 3959 ------------------------------------- 3960 Proc2 25 26 27 | 0 0 28 | 29 0 3961 30 0 0 | 31 32 33 | 0 34 3962 .ve 3963 3964 This can be represented as a collection of submatrices as: 3965 3966 .vb 3967 A B C 3968 D E F 3969 G H I 3970 .ve 3971 3972 Where the submatrices A,B,C are owned by proc0, D,E,F are 3973 owned by proc1, G,H,I are owned by proc2. 3974 3975 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3976 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3977 The 'M','N' parameters are 8,8, and have the same values on all procs. 3978 3979 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3980 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3981 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3982 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3983 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3984 matrix, ans [DF] as another SeqAIJ matrix. 3985 3986 When d_nz, o_nz parameters are specified, d_nz storage elements are 3987 allocated for every row of the local diagonal submatrix, and o_nz 3988 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3989 One way to choose d_nz and o_nz is to use the max nonzerors per local 3990 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3991 In this case, the values of d_nz,o_nz are: 3992 .vb 3993 proc0 : dnz = 2, o_nz = 2 3994 proc1 : dnz = 3, o_nz = 2 3995 proc2 : dnz = 1, o_nz = 4 3996 .ve 3997 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3998 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3999 for proc3. i.e we are using 12+15+10=37 storage locations to store 4000 34 values. 4001 4002 When d_nnz, o_nnz parameters are specified, the storage is specified 4003 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4004 In the above case the values for d_nnz,o_nnz are: 4005 .vb 4006 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4007 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4008 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4009 .ve 4010 Here the space allocated is sum of all the above values i.e 34, and 4011 hence pre-allocation is perfect. 4012 4013 Level: intermediate 4014 4015 .keywords: matrix, aij, compressed row, sparse, parallel 4016 4017 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4018 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4019 @*/ 4020 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4021 { 4022 PetscErrorCode ierr; 4023 4024 PetscFunctionBegin; 4025 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4026 PetscValidType(B,1); 4027 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4028 PetscFunctionReturn(0); 4029 } 4030 4031 #undef __FUNCT__ 4032 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4033 /*@ 4034 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4035 CSR format the local rows. 4036 4037 Collective on MPI_Comm 4038 4039 Input Parameters: 4040 + comm - MPI communicator 4041 . m - number of local rows (Cannot be PETSC_DECIDE) 4042 . n - This value should be the same as the local size used in creating the 4043 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4044 calculated if N is given) For square matrices n is almost always m. 4045 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4046 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4047 . i - row indices 4048 . j - column indices 4049 - a - matrix values 4050 4051 Output Parameter: 4052 . mat - the matrix 4053 4054 Level: intermediate 4055 4056 Notes: 4057 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4058 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4059 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4060 4061 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4062 4063 The format which is used for the sparse matrix input, is equivalent to a 4064 row-major ordering.. i.e for the following matrix, the input data expected is 4065 as shown: 4066 4067 1 0 0 4068 2 0 3 P0 4069 ------- 4070 4 5 6 P1 4071 4072 Process0 [P0]: rows_owned=[0,1] 4073 i = {0,1,3} [size = nrow+1 = 2+1] 4074 j = {0,0,2} [size = nz = 6] 4075 v = {1,2,3} [size = nz = 6] 4076 4077 Process1 [P1]: rows_owned=[2] 4078 i = {0,3} [size = nrow+1 = 1+1] 4079 j = {0,1,2} [size = nz = 6] 4080 v = {4,5,6} [size = nz = 6] 4081 4082 .keywords: matrix, aij, compressed row, sparse, parallel 4083 4084 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4085 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4086 @*/ 4087 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4088 { 4089 PetscErrorCode ierr; 4090 4091 PetscFunctionBegin; 4092 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4093 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4094 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4095 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4096 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4097 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4098 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4099 PetscFunctionReturn(0); 4100 } 4101 4102 #undef __FUNCT__ 4103 #define __FUNCT__ "MatCreateAIJ" 4104 /*@C 4105 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4106 (the default parallel PETSc format). For good matrix assembly performance 4107 the user should preallocate the matrix storage by setting the parameters 4108 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4109 performance can be increased by more than a factor of 50. 4110 4111 Collective on MPI_Comm 4112 4113 Input Parameters: 4114 + comm - MPI communicator 4115 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4116 This value should be the same as the local size used in creating the 4117 y vector for the matrix-vector product y = Ax. 4118 . n - This value should be the same as the local size used in creating the 4119 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4120 calculated if N is given) For square matrices n is almost always m. 4121 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4122 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4123 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4124 (same value is used for all local rows) 4125 . d_nnz - array containing the number of nonzeros in the various rows of the 4126 DIAGONAL portion of the local submatrix (possibly different for each row) 4127 or NULL, if d_nz is used to specify the nonzero structure. 4128 The size of this array is equal to the number of local rows, i.e 'm'. 4129 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4130 submatrix (same value is used for all local rows). 4131 - o_nnz - array containing the number of nonzeros in the various rows of the 4132 OFF-DIAGONAL portion of the local submatrix (possibly different for 4133 each row) or NULL, if o_nz is used to specify the nonzero 4134 structure. The size of this array is equal to the number 4135 of local rows, i.e 'm'. 4136 4137 Output Parameter: 4138 . A - the matrix 4139 4140 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4141 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4142 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4143 4144 Notes: 4145 If the *_nnz parameter is given then the *_nz parameter is ignored 4146 4147 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4148 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4149 storage requirements for this matrix. 4150 4151 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4152 processor than it must be used on all processors that share the object for 4153 that argument. 4154 4155 The user MUST specify either the local or global matrix dimensions 4156 (possibly both). 4157 4158 The parallel matrix is partitioned across processors such that the 4159 first m0 rows belong to process 0, the next m1 rows belong to 4160 process 1, the next m2 rows belong to process 2 etc.. where 4161 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4162 values corresponding to [m x N] submatrix. 4163 4164 The columns are logically partitioned with the n0 columns belonging 4165 to 0th partition, the next n1 columns belonging to the next 4166 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4167 4168 The DIAGONAL portion of the local submatrix on any given processor 4169 is the submatrix corresponding to the rows and columns m,n 4170 corresponding to the given processor. i.e diagonal matrix on 4171 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4172 etc. The remaining portion of the local submatrix [m x (N-n)] 4173 constitute the OFF-DIAGONAL portion. The example below better 4174 illustrates this concept. 4175 4176 For a square global matrix we define each processor's diagonal portion 4177 to be its local rows and the corresponding columns (a square submatrix); 4178 each processor's off-diagonal portion encompasses the remainder of the 4179 local matrix (a rectangular submatrix). 4180 4181 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4182 4183 When calling this routine with a single process communicator, a matrix of 4184 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4185 type of communicator, use the construction mechanism: 4186 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4187 4188 By default, this format uses inodes (identical nodes) when possible. 4189 We search for consecutive rows with the same nonzero structure, thereby 4190 reusing matrix information to achieve increased efficiency. 4191 4192 Options Database Keys: 4193 + -mat_no_inode - Do not use inodes 4194 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4195 - -mat_aij_oneindex - Internally use indexing starting at 1 4196 rather than 0. Note that when calling MatSetValues(), 4197 the user still MUST index entries starting at 0! 4198 4199 4200 Example usage: 4201 4202 Consider the following 8x8 matrix with 34 non-zero values, that is 4203 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4204 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4205 as follows: 4206 4207 .vb 4208 1 2 0 | 0 3 0 | 0 4 4209 Proc0 0 5 6 | 7 0 0 | 8 0 4210 9 0 10 | 11 0 0 | 12 0 4211 ------------------------------------- 4212 13 0 14 | 15 16 17 | 0 0 4213 Proc1 0 18 0 | 19 20 21 | 0 0 4214 0 0 0 | 22 23 0 | 24 0 4215 ------------------------------------- 4216 Proc2 25 26 27 | 0 0 28 | 29 0 4217 30 0 0 | 31 32 33 | 0 34 4218 .ve 4219 4220 This can be represented as a collection of submatrices as: 4221 4222 .vb 4223 A B C 4224 D E F 4225 G H I 4226 .ve 4227 4228 Where the submatrices A,B,C are owned by proc0, D,E,F are 4229 owned by proc1, G,H,I are owned by proc2. 4230 4231 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4232 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4233 The 'M','N' parameters are 8,8, and have the same values on all procs. 4234 4235 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4236 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4237 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4238 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4239 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4240 matrix, ans [DF] as another SeqAIJ matrix. 4241 4242 When d_nz, o_nz parameters are specified, d_nz storage elements are 4243 allocated for every row of the local diagonal submatrix, and o_nz 4244 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4245 One way to choose d_nz and o_nz is to use the max nonzerors per local 4246 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4247 In this case, the values of d_nz,o_nz are: 4248 .vb 4249 proc0 : dnz = 2, o_nz = 2 4250 proc1 : dnz = 3, o_nz = 2 4251 proc2 : dnz = 1, o_nz = 4 4252 .ve 4253 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4254 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4255 for proc3. i.e we are using 12+15+10=37 storage locations to store 4256 34 values. 4257 4258 When d_nnz, o_nnz parameters are specified, the storage is specified 4259 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4260 In the above case the values for d_nnz,o_nnz are: 4261 .vb 4262 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4263 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4264 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4265 .ve 4266 Here the space allocated is sum of all the above values i.e 34, and 4267 hence pre-allocation is perfect. 4268 4269 Level: intermediate 4270 4271 .keywords: matrix, aij, compressed row, sparse, parallel 4272 4273 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4274 MPIAIJ, MatCreateMPIAIJWithArrays() 4275 @*/ 4276 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4277 { 4278 PetscErrorCode ierr; 4279 PetscMPIInt size; 4280 4281 PetscFunctionBegin; 4282 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4283 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4284 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4285 if (size > 1) { 4286 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4287 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4288 } else { 4289 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4290 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4291 } 4292 PetscFunctionReturn(0); 4293 } 4294 4295 #undef __FUNCT__ 4296 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4297 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4298 { 4299 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4300 4301 PetscFunctionBegin; 4302 *Ad = a->A; 4303 *Ao = a->B; 4304 *colmap = a->garray; 4305 PetscFunctionReturn(0); 4306 } 4307 4308 #undef __FUNCT__ 4309 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4310 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4311 { 4312 PetscErrorCode ierr; 4313 PetscInt i; 4314 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4315 4316 PetscFunctionBegin; 4317 if (coloring->ctype == IS_COLORING_GLOBAL) { 4318 ISColoringValue *allcolors,*colors; 4319 ISColoring ocoloring; 4320 4321 /* set coloring for diagonal portion */ 4322 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4323 4324 /* set coloring for off-diagonal portion */ 4325 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4326 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4327 for (i=0; i<a->B->cmap->n; i++) { 4328 colors[i] = allcolors[a->garray[i]]; 4329 } 4330 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4331 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4332 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4333 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4334 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4335 ISColoringValue *colors; 4336 PetscInt *larray; 4337 ISColoring ocoloring; 4338 4339 /* set coloring for diagonal portion */ 4340 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4341 for (i=0; i<a->A->cmap->n; i++) { 4342 larray[i] = i + A->cmap->rstart; 4343 } 4344 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4345 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4346 for (i=0; i<a->A->cmap->n; i++) { 4347 colors[i] = coloring->colors[larray[i]]; 4348 } 4349 ierr = PetscFree(larray);CHKERRQ(ierr); 4350 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4351 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4352 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4353 4354 /* set coloring for off-diagonal portion */ 4355 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4356 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4357 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4358 for (i=0; i<a->B->cmap->n; i++) { 4359 colors[i] = coloring->colors[larray[i]]; 4360 } 4361 ierr = PetscFree(larray);CHKERRQ(ierr); 4362 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4363 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4364 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4365 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4366 PetscFunctionReturn(0); 4367 } 4368 4369 #undef __FUNCT__ 4370 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4371 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4372 { 4373 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4374 PetscErrorCode ierr; 4375 4376 PetscFunctionBegin; 4377 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4378 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4379 PetscFunctionReturn(0); 4380 } 4381 4382 #undef __FUNCT__ 4383 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4384 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4385 { 4386 PetscErrorCode ierr; 4387 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4388 PetscInt *indx; 4389 4390 PetscFunctionBegin; 4391 /* This routine will ONLY return MPIAIJ type matrix */ 4392 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4393 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4394 if (n == PETSC_DECIDE) { 4395 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4396 } 4397 /* Check sum(n) = N */ 4398 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4399 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4400 4401 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4402 rstart -= m; 4403 4404 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4405 for (i=0; i<m; i++) { 4406 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4407 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4408 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4409 } 4410 4411 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4412 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4413 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4414 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4415 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4416 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4417 PetscFunctionReturn(0); 4418 } 4419 4420 #undef __FUNCT__ 4421 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4422 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4423 { 4424 PetscErrorCode ierr; 4425 PetscInt m,N,i,rstart,nnz,Ii; 4426 PetscInt *indx; 4427 PetscScalar *values; 4428 4429 PetscFunctionBegin; 4430 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4431 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4432 for (i=0; i<m; i++) { 4433 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4434 Ii = i + rstart; 4435 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4436 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4437 } 4438 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4439 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4440 PetscFunctionReturn(0); 4441 } 4442 4443 #undef __FUNCT__ 4444 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4445 /*@ 4446 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4447 matrices from each processor 4448 4449 Collective on MPI_Comm 4450 4451 Input Parameters: 4452 + comm - the communicators the parallel matrix will live on 4453 . inmat - the input sequential matrices 4454 . n - number of local columns (or PETSC_DECIDE) 4455 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4456 4457 Output Parameter: 4458 . outmat - the parallel matrix generated 4459 4460 Level: advanced 4461 4462 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4463 4464 @*/ 4465 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4466 { 4467 PetscErrorCode ierr; 4468 4469 PetscFunctionBegin; 4470 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4471 if (scall == MAT_INITIAL_MATRIX) { 4472 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4473 } 4474 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4475 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4476 PetscFunctionReturn(0); 4477 } 4478 4479 #undef __FUNCT__ 4480 #define __FUNCT__ "MatFileSplit" 4481 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4482 { 4483 PetscErrorCode ierr; 4484 PetscMPIInt rank; 4485 PetscInt m,N,i,rstart,nnz; 4486 size_t len; 4487 const PetscInt *indx; 4488 PetscViewer out; 4489 char *name; 4490 Mat B; 4491 const PetscScalar *values; 4492 4493 PetscFunctionBegin; 4494 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4495 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4496 /* Should this be the type of the diagonal block of A? */ 4497 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4498 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4499 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4500 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4501 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4502 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4503 for (i=0; i<m; i++) { 4504 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4505 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4506 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4507 } 4508 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4509 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4510 4511 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4512 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4513 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4514 sprintf(name,"%s.%d",outfile,rank); 4515 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4516 ierr = PetscFree(name);CHKERRQ(ierr); 4517 ierr = MatView(B,out);CHKERRQ(ierr); 4518 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4519 ierr = MatDestroy(&B);CHKERRQ(ierr); 4520 PetscFunctionReturn(0); 4521 } 4522 4523 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4524 #undef __FUNCT__ 4525 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4526 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4527 { 4528 PetscErrorCode ierr; 4529 Mat_Merge_SeqsToMPI *merge; 4530 PetscContainer container; 4531 4532 PetscFunctionBegin; 4533 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4534 if (container) { 4535 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4536 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4537 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4538 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4539 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4540 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4541 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4542 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4543 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4544 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4545 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4546 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4547 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4548 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4549 ierr = PetscFree(merge);CHKERRQ(ierr); 4550 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4551 } 4552 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4553 PetscFunctionReturn(0); 4554 } 4555 4556 #include <../src/mat/utils/freespace.h> 4557 #include <petscbt.h> 4558 4559 #undef __FUNCT__ 4560 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4561 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4562 { 4563 PetscErrorCode ierr; 4564 MPI_Comm comm; 4565 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4566 PetscMPIInt size,rank,taga,*len_s; 4567 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4568 PetscInt proc,m; 4569 PetscInt **buf_ri,**buf_rj; 4570 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4571 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4572 MPI_Request *s_waits,*r_waits; 4573 MPI_Status *status; 4574 MatScalar *aa=a->a; 4575 MatScalar **abuf_r,*ba_i; 4576 Mat_Merge_SeqsToMPI *merge; 4577 PetscContainer container; 4578 4579 PetscFunctionBegin; 4580 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4581 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4582 4583 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4584 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4585 4586 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4587 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4588 4589 bi = merge->bi; 4590 bj = merge->bj; 4591 buf_ri = merge->buf_ri; 4592 buf_rj = merge->buf_rj; 4593 4594 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4595 owners = merge->rowmap->range; 4596 len_s = merge->len_s; 4597 4598 /* send and recv matrix values */ 4599 /*-----------------------------*/ 4600 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4601 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4602 4603 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4604 for (proc=0,k=0; proc<size; proc++) { 4605 if (!len_s[proc]) continue; 4606 i = owners[proc]; 4607 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4608 k++; 4609 } 4610 4611 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4612 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4613 ierr = PetscFree(status);CHKERRQ(ierr); 4614 4615 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4616 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4617 4618 /* insert mat values of mpimat */ 4619 /*----------------------------*/ 4620 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4621 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4622 4623 for (k=0; k<merge->nrecv; k++) { 4624 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4625 nrows = *(buf_ri_k[k]); 4626 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4627 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4628 } 4629 4630 /* set values of ba */ 4631 m = merge->rowmap->n; 4632 for (i=0; i<m; i++) { 4633 arow = owners[rank] + i; 4634 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4635 bnzi = bi[i+1] - bi[i]; 4636 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4637 4638 /* add local non-zero vals of this proc's seqmat into ba */ 4639 anzi = ai[arow+1] - ai[arow]; 4640 aj = a->j + ai[arow]; 4641 aa = a->a + ai[arow]; 4642 nextaj = 0; 4643 for (j=0; nextaj<anzi; j++) { 4644 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4645 ba_i[j] += aa[nextaj++]; 4646 } 4647 } 4648 4649 /* add received vals into ba */ 4650 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4651 /* i-th row */ 4652 if (i == *nextrow[k]) { 4653 anzi = *(nextai[k]+1) - *nextai[k]; 4654 aj = buf_rj[k] + *(nextai[k]); 4655 aa = abuf_r[k] + *(nextai[k]); 4656 nextaj = 0; 4657 for (j=0; nextaj<anzi; j++) { 4658 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4659 ba_i[j] += aa[nextaj++]; 4660 } 4661 } 4662 nextrow[k]++; nextai[k]++; 4663 } 4664 } 4665 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4666 } 4667 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4668 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4669 4670 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4671 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4672 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4673 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4674 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4675 PetscFunctionReturn(0); 4676 } 4677 4678 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4679 4680 #undef __FUNCT__ 4681 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4682 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4683 { 4684 PetscErrorCode ierr; 4685 Mat B_mpi; 4686 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4687 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4688 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4689 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4690 PetscInt len,proc,*dnz,*onz,bs,cbs; 4691 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4692 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4693 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4694 MPI_Status *status; 4695 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4696 PetscBT lnkbt; 4697 Mat_Merge_SeqsToMPI *merge; 4698 PetscContainer container; 4699 4700 PetscFunctionBegin; 4701 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4702 4703 /* make sure it is a PETSc comm */ 4704 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4705 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4706 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4707 4708 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4709 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4710 4711 /* determine row ownership */ 4712 /*---------------------------------------------------------*/ 4713 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4714 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4715 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4716 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4717 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4718 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4719 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4720 4721 m = merge->rowmap->n; 4722 owners = merge->rowmap->range; 4723 4724 /* determine the number of messages to send, their lengths */ 4725 /*---------------------------------------------------------*/ 4726 len_s = merge->len_s; 4727 4728 len = 0; /* length of buf_si[] */ 4729 merge->nsend = 0; 4730 for (proc=0; proc<size; proc++) { 4731 len_si[proc] = 0; 4732 if (proc == rank) { 4733 len_s[proc] = 0; 4734 } else { 4735 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4736 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4737 } 4738 if (len_s[proc]) { 4739 merge->nsend++; 4740 nrows = 0; 4741 for (i=owners[proc]; i<owners[proc+1]; i++) { 4742 if (ai[i+1] > ai[i]) nrows++; 4743 } 4744 len_si[proc] = 2*(nrows+1); 4745 len += len_si[proc]; 4746 } 4747 } 4748 4749 /* determine the number and length of messages to receive for ij-structure */ 4750 /*-------------------------------------------------------------------------*/ 4751 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4752 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4753 4754 /* post the Irecv of j-structure */ 4755 /*-------------------------------*/ 4756 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4757 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4758 4759 /* post the Isend of j-structure */ 4760 /*--------------------------------*/ 4761 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4762 4763 for (proc=0, k=0; proc<size; proc++) { 4764 if (!len_s[proc]) continue; 4765 i = owners[proc]; 4766 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4767 k++; 4768 } 4769 4770 /* receives and sends of j-structure are complete */ 4771 /*------------------------------------------------*/ 4772 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4773 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4774 4775 /* send and recv i-structure */ 4776 /*---------------------------*/ 4777 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4778 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4779 4780 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4781 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4782 for (proc=0,k=0; proc<size; proc++) { 4783 if (!len_s[proc]) continue; 4784 /* form outgoing message for i-structure: 4785 buf_si[0]: nrows to be sent 4786 [1:nrows]: row index (global) 4787 [nrows+1:2*nrows+1]: i-structure index 4788 */ 4789 /*-------------------------------------------*/ 4790 nrows = len_si[proc]/2 - 1; 4791 buf_si_i = buf_si + nrows+1; 4792 buf_si[0] = nrows; 4793 buf_si_i[0] = 0; 4794 nrows = 0; 4795 for (i=owners[proc]; i<owners[proc+1]; i++) { 4796 anzi = ai[i+1] - ai[i]; 4797 if (anzi) { 4798 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4799 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4800 nrows++; 4801 } 4802 } 4803 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4804 k++; 4805 buf_si += len_si[proc]; 4806 } 4807 4808 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4809 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4810 4811 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4812 for (i=0; i<merge->nrecv; i++) { 4813 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4814 } 4815 4816 ierr = PetscFree(len_si);CHKERRQ(ierr); 4817 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4818 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4819 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4820 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4821 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4822 ierr = PetscFree(status);CHKERRQ(ierr); 4823 4824 /* compute a local seq matrix in each processor */ 4825 /*----------------------------------------------*/ 4826 /* allocate bi array and free space for accumulating nonzero column info */ 4827 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4828 bi[0] = 0; 4829 4830 /* create and initialize a linked list */ 4831 nlnk = N+1; 4832 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4833 4834 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4835 len = ai[owners[rank+1]] - ai[owners[rank]]; 4836 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4837 4838 current_space = free_space; 4839 4840 /* determine symbolic info for each local row */ 4841 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4842 4843 for (k=0; k<merge->nrecv; k++) { 4844 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4845 nrows = *buf_ri_k[k]; 4846 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4847 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4848 } 4849 4850 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4851 len = 0; 4852 for (i=0; i<m; i++) { 4853 bnzi = 0; 4854 /* add local non-zero cols of this proc's seqmat into lnk */ 4855 arow = owners[rank] + i; 4856 anzi = ai[arow+1] - ai[arow]; 4857 aj = a->j + ai[arow]; 4858 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4859 bnzi += nlnk; 4860 /* add received col data into lnk */ 4861 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4862 if (i == *nextrow[k]) { /* i-th row */ 4863 anzi = *(nextai[k]+1) - *nextai[k]; 4864 aj = buf_rj[k] + *nextai[k]; 4865 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4866 bnzi += nlnk; 4867 nextrow[k]++; nextai[k]++; 4868 } 4869 } 4870 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4871 4872 /* if free space is not available, make more free space */ 4873 if (current_space->local_remaining<bnzi) { 4874 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4875 nspacedouble++; 4876 } 4877 /* copy data into free space, then initialize lnk */ 4878 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4879 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4880 4881 current_space->array += bnzi; 4882 current_space->local_used += bnzi; 4883 current_space->local_remaining -= bnzi; 4884 4885 bi[i+1] = bi[i] + bnzi; 4886 } 4887 4888 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4889 4890 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4891 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4892 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4893 4894 /* create symbolic parallel matrix B_mpi */ 4895 /*---------------------------------------*/ 4896 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4897 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4898 if (n==PETSC_DECIDE) { 4899 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4900 } else { 4901 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4902 } 4903 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4904 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4905 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4906 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4907 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4908 4909 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4910 B_mpi->assembled = PETSC_FALSE; 4911 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4912 merge->bi = bi; 4913 merge->bj = bj; 4914 merge->buf_ri = buf_ri; 4915 merge->buf_rj = buf_rj; 4916 merge->coi = NULL; 4917 merge->coj = NULL; 4918 merge->owners_co = NULL; 4919 4920 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4921 4922 /* attach the supporting struct to B_mpi for reuse */ 4923 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4924 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4925 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4926 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4927 *mpimat = B_mpi; 4928 4929 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4930 PetscFunctionReturn(0); 4931 } 4932 4933 #undef __FUNCT__ 4934 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4935 /*@C 4936 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4937 matrices from each processor 4938 4939 Collective on MPI_Comm 4940 4941 Input Parameters: 4942 + comm - the communicators the parallel matrix will live on 4943 . seqmat - the input sequential matrices 4944 . m - number of local rows (or PETSC_DECIDE) 4945 . n - number of local columns (or PETSC_DECIDE) 4946 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4947 4948 Output Parameter: 4949 . mpimat - the parallel matrix generated 4950 4951 Level: advanced 4952 4953 Notes: 4954 The dimensions of the sequential matrix in each processor MUST be the same. 4955 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4956 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4957 @*/ 4958 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4959 { 4960 PetscErrorCode ierr; 4961 PetscMPIInt size; 4962 4963 PetscFunctionBegin; 4964 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4965 if (size == 1) { 4966 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4967 if (scall == MAT_INITIAL_MATRIX) { 4968 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4969 } else { 4970 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4971 } 4972 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4973 PetscFunctionReturn(0); 4974 } 4975 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4976 if (scall == MAT_INITIAL_MATRIX) { 4977 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4978 } 4979 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4980 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4981 PetscFunctionReturn(0); 4982 } 4983 4984 #undef __FUNCT__ 4985 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4986 /*@ 4987 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4988 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4989 with MatGetSize() 4990 4991 Not Collective 4992 4993 Input Parameters: 4994 + A - the matrix 4995 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4996 4997 Output Parameter: 4998 . A_loc - the local sequential matrix generated 4999 5000 Level: developer 5001 5002 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5003 5004 @*/ 5005 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5006 { 5007 PetscErrorCode ierr; 5008 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5009 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5010 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5011 MatScalar *aa=a->a,*ba=b->a,*cam; 5012 PetscScalar *ca; 5013 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5014 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5015 PetscBool match; 5016 5017 PetscFunctionBegin; 5018 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5019 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5020 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5021 if (scall == MAT_INITIAL_MATRIX) { 5022 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5023 ci[0] = 0; 5024 for (i=0; i<am; i++) { 5025 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5026 } 5027 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5028 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5029 k = 0; 5030 for (i=0; i<am; i++) { 5031 ncols_o = bi[i+1] - bi[i]; 5032 ncols_d = ai[i+1] - ai[i]; 5033 /* off-diagonal portion of A */ 5034 for (jo=0; jo<ncols_o; jo++) { 5035 col = cmap[*bj]; 5036 if (col >= cstart) break; 5037 cj[k] = col; bj++; 5038 ca[k++] = *ba++; 5039 } 5040 /* diagonal portion of A */ 5041 for (j=0; j<ncols_d; j++) { 5042 cj[k] = cstart + *aj++; 5043 ca[k++] = *aa++; 5044 } 5045 /* off-diagonal portion of A */ 5046 for (j=jo; j<ncols_o; j++) { 5047 cj[k] = cmap[*bj++]; 5048 ca[k++] = *ba++; 5049 } 5050 } 5051 /* put together the new matrix */ 5052 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5053 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5054 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5055 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5056 mat->free_a = PETSC_TRUE; 5057 mat->free_ij = PETSC_TRUE; 5058 mat->nonew = 0; 5059 } else if (scall == MAT_REUSE_MATRIX) { 5060 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5061 ci = mat->i; cj = mat->j; cam = mat->a; 5062 for (i=0; i<am; i++) { 5063 /* off-diagonal portion of A */ 5064 ncols_o = bi[i+1] - bi[i]; 5065 for (jo=0; jo<ncols_o; jo++) { 5066 col = cmap[*bj]; 5067 if (col >= cstart) break; 5068 *cam++ = *ba++; bj++; 5069 } 5070 /* diagonal portion of A */ 5071 ncols_d = ai[i+1] - ai[i]; 5072 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5073 /* off-diagonal portion of A */ 5074 for (j=jo; j<ncols_o; j++) { 5075 *cam++ = *ba++; bj++; 5076 } 5077 } 5078 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5079 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5080 PetscFunctionReturn(0); 5081 } 5082 5083 #undef __FUNCT__ 5084 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5085 /*@C 5086 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5087 5088 Not Collective 5089 5090 Input Parameters: 5091 + A - the matrix 5092 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5093 - row, col - index sets of rows and columns to extract (or NULL) 5094 5095 Output Parameter: 5096 . A_loc - the local sequential matrix generated 5097 5098 Level: developer 5099 5100 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5101 5102 @*/ 5103 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5104 { 5105 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5106 PetscErrorCode ierr; 5107 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5108 IS isrowa,iscola; 5109 Mat *aloc; 5110 PetscBool match; 5111 5112 PetscFunctionBegin; 5113 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5114 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5115 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5116 if (!row) { 5117 start = A->rmap->rstart; end = A->rmap->rend; 5118 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5119 } else { 5120 isrowa = *row; 5121 } 5122 if (!col) { 5123 start = A->cmap->rstart; 5124 cmap = a->garray; 5125 nzA = a->A->cmap->n; 5126 nzB = a->B->cmap->n; 5127 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5128 ncols = 0; 5129 for (i=0; i<nzB; i++) { 5130 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5131 else break; 5132 } 5133 imark = i; 5134 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5135 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5136 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5137 } else { 5138 iscola = *col; 5139 } 5140 if (scall != MAT_INITIAL_MATRIX) { 5141 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5142 aloc[0] = *A_loc; 5143 } 5144 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5145 *A_loc = aloc[0]; 5146 ierr = PetscFree(aloc);CHKERRQ(ierr); 5147 if (!row) { 5148 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5149 } 5150 if (!col) { 5151 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5152 } 5153 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5154 PetscFunctionReturn(0); 5155 } 5156 5157 #undef __FUNCT__ 5158 #define __FUNCT__ "MatGetBrowsOfAcols" 5159 /*@C 5160 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5161 5162 Collective on Mat 5163 5164 Input Parameters: 5165 + A,B - the matrices in mpiaij format 5166 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5167 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5168 5169 Output Parameter: 5170 + rowb, colb - index sets of rows and columns of B to extract 5171 - B_seq - the sequential matrix generated 5172 5173 Level: developer 5174 5175 @*/ 5176 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5177 { 5178 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5179 PetscErrorCode ierr; 5180 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5181 IS isrowb,iscolb; 5182 Mat *bseq=NULL; 5183 5184 PetscFunctionBegin; 5185 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5186 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5187 } 5188 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5189 5190 if (scall == MAT_INITIAL_MATRIX) { 5191 start = A->cmap->rstart; 5192 cmap = a->garray; 5193 nzA = a->A->cmap->n; 5194 nzB = a->B->cmap->n; 5195 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5196 ncols = 0; 5197 for (i=0; i<nzB; i++) { /* row < local row index */ 5198 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5199 else break; 5200 } 5201 imark = i; 5202 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5203 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5204 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5205 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5206 } else { 5207 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5208 isrowb = *rowb; iscolb = *colb; 5209 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5210 bseq[0] = *B_seq; 5211 } 5212 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5213 *B_seq = bseq[0]; 5214 ierr = PetscFree(bseq);CHKERRQ(ierr); 5215 if (!rowb) { 5216 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5217 } else { 5218 *rowb = isrowb; 5219 } 5220 if (!colb) { 5221 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5222 } else { 5223 *colb = iscolb; 5224 } 5225 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5226 PetscFunctionReturn(0); 5227 } 5228 5229 #undef __FUNCT__ 5230 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5231 /* 5232 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5233 of the OFF-DIAGONAL portion of local A 5234 5235 Collective on Mat 5236 5237 Input Parameters: 5238 + A,B - the matrices in mpiaij format 5239 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5240 5241 Output Parameter: 5242 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5243 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5244 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5245 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5246 5247 Level: developer 5248 5249 */ 5250 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5251 { 5252 VecScatter_MPI_General *gen_to,*gen_from; 5253 PetscErrorCode ierr; 5254 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5255 Mat_SeqAIJ *b_oth; 5256 VecScatter ctx =a->Mvctx; 5257 MPI_Comm comm; 5258 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5259 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5260 PetscScalar *rvalues,*svalues; 5261 MatScalar *b_otha,*bufa,*bufA; 5262 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5263 MPI_Request *rwaits = NULL,*swaits = NULL; 5264 MPI_Status *sstatus,rstatus; 5265 PetscMPIInt jj; 5266 PetscInt *cols,sbs,rbs; 5267 PetscScalar *vals; 5268 5269 PetscFunctionBegin; 5270 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5271 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5272 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5273 } 5274 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5275 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5276 5277 gen_to = (VecScatter_MPI_General*)ctx->todata; 5278 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5279 rvalues = gen_from->values; /* holds the length of receiving row */ 5280 svalues = gen_to->values; /* holds the length of sending row */ 5281 nrecvs = gen_from->n; 5282 nsends = gen_to->n; 5283 5284 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5285 srow = gen_to->indices; /* local row index to be sent */ 5286 sstarts = gen_to->starts; 5287 sprocs = gen_to->procs; 5288 sstatus = gen_to->sstatus; 5289 sbs = gen_to->bs; 5290 rstarts = gen_from->starts; 5291 rprocs = gen_from->procs; 5292 rbs = gen_from->bs; 5293 5294 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5295 if (scall == MAT_INITIAL_MATRIX) { 5296 /* i-array */ 5297 /*---------*/ 5298 /* post receives */ 5299 for (i=0; i<nrecvs; i++) { 5300 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5301 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5302 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5303 } 5304 5305 /* pack the outgoing message */ 5306 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5307 5308 sstartsj[0] = 0; 5309 rstartsj[0] = 0; 5310 len = 0; /* total length of j or a array to be sent */ 5311 k = 0; 5312 for (i=0; i<nsends; i++) { 5313 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5314 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5315 for (j=0; j<nrows; j++) { 5316 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5317 for (l=0; l<sbs; l++) { 5318 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5319 5320 rowlen[j*sbs+l] = ncols; 5321 5322 len += ncols; 5323 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5324 } 5325 k++; 5326 } 5327 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5328 5329 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5330 } 5331 /* recvs and sends of i-array are completed */ 5332 i = nrecvs; 5333 while (i--) { 5334 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5335 } 5336 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5337 5338 /* allocate buffers for sending j and a arrays */ 5339 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5340 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5341 5342 /* create i-array of B_oth */ 5343 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5344 5345 b_othi[0] = 0; 5346 len = 0; /* total length of j or a array to be received */ 5347 k = 0; 5348 for (i=0; i<nrecvs; i++) { 5349 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5350 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5351 for (j=0; j<nrows; j++) { 5352 b_othi[k+1] = b_othi[k] + rowlen[j]; 5353 len += rowlen[j]; k++; 5354 } 5355 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5356 } 5357 5358 /* allocate space for j and a arrrays of B_oth */ 5359 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5360 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5361 5362 /* j-array */ 5363 /*---------*/ 5364 /* post receives of j-array */ 5365 for (i=0; i<nrecvs; i++) { 5366 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5367 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5368 } 5369 5370 /* pack the outgoing message j-array */ 5371 k = 0; 5372 for (i=0; i<nsends; i++) { 5373 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5374 bufJ = bufj+sstartsj[i]; 5375 for (j=0; j<nrows; j++) { 5376 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5377 for (ll=0; ll<sbs; ll++) { 5378 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5379 for (l=0; l<ncols; l++) { 5380 *bufJ++ = cols[l]; 5381 } 5382 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5383 } 5384 } 5385 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5386 } 5387 5388 /* recvs and sends of j-array are completed */ 5389 i = nrecvs; 5390 while (i--) { 5391 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5392 } 5393 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5394 } else if (scall == MAT_REUSE_MATRIX) { 5395 sstartsj = *startsj_s; 5396 rstartsj = *startsj_r; 5397 bufa = *bufa_ptr; 5398 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5399 b_otha = b_oth->a; 5400 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5401 5402 /* a-array */ 5403 /*---------*/ 5404 /* post receives of a-array */ 5405 for (i=0; i<nrecvs; i++) { 5406 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5407 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5408 } 5409 5410 /* pack the outgoing message a-array */ 5411 k = 0; 5412 for (i=0; i<nsends; i++) { 5413 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5414 bufA = bufa+sstartsj[i]; 5415 for (j=0; j<nrows; j++) { 5416 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5417 for (ll=0; ll<sbs; ll++) { 5418 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5419 for (l=0; l<ncols; l++) { 5420 *bufA++ = vals[l]; 5421 } 5422 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5423 } 5424 } 5425 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5426 } 5427 /* recvs and sends of a-array are completed */ 5428 i = nrecvs; 5429 while (i--) { 5430 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5431 } 5432 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5433 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5434 5435 if (scall == MAT_INITIAL_MATRIX) { 5436 /* put together the new matrix */ 5437 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5438 5439 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5440 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5441 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5442 b_oth->free_a = PETSC_TRUE; 5443 b_oth->free_ij = PETSC_TRUE; 5444 b_oth->nonew = 0; 5445 5446 ierr = PetscFree(bufj);CHKERRQ(ierr); 5447 if (!startsj_s || !bufa_ptr) { 5448 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5449 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5450 } else { 5451 *startsj_s = sstartsj; 5452 *startsj_r = rstartsj; 5453 *bufa_ptr = bufa; 5454 } 5455 } 5456 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5457 PetscFunctionReturn(0); 5458 } 5459 5460 #undef __FUNCT__ 5461 #define __FUNCT__ "MatGetCommunicationStructs" 5462 /*@C 5463 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5464 5465 Not Collective 5466 5467 Input Parameters: 5468 . A - The matrix in mpiaij format 5469 5470 Output Parameter: 5471 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5472 . colmap - A map from global column index to local index into lvec 5473 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5474 5475 Level: developer 5476 5477 @*/ 5478 #if defined(PETSC_USE_CTABLE) 5479 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5480 #else 5481 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5482 #endif 5483 { 5484 Mat_MPIAIJ *a; 5485 5486 PetscFunctionBegin; 5487 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5488 PetscValidPointer(lvec, 2); 5489 PetscValidPointer(colmap, 3); 5490 PetscValidPointer(multScatter, 4); 5491 a = (Mat_MPIAIJ*) A->data; 5492 if (lvec) *lvec = a->lvec; 5493 if (colmap) *colmap = a->colmap; 5494 if (multScatter) *multScatter = a->Mvctx; 5495 PetscFunctionReturn(0); 5496 } 5497 5498 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5499 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5500 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5501 5502 #undef __FUNCT__ 5503 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5504 /* 5505 Computes (B'*A')' since computing B*A directly is untenable 5506 5507 n p p 5508 ( ) ( ) ( ) 5509 m ( A ) * n ( B ) = m ( C ) 5510 ( ) ( ) ( ) 5511 5512 */ 5513 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5514 { 5515 PetscErrorCode ierr; 5516 Mat At,Bt,Ct; 5517 5518 PetscFunctionBegin; 5519 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5520 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5521 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5522 ierr = MatDestroy(&At);CHKERRQ(ierr); 5523 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5524 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5525 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5526 PetscFunctionReturn(0); 5527 } 5528 5529 #undef __FUNCT__ 5530 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5531 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5532 { 5533 PetscErrorCode ierr; 5534 PetscInt m=A->rmap->n,n=B->cmap->n; 5535 Mat Cmat; 5536 5537 PetscFunctionBegin; 5538 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5539 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5540 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5541 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5542 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5543 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5544 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5545 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5546 5547 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5548 5549 *C = Cmat; 5550 PetscFunctionReturn(0); 5551 } 5552 5553 /* ----------------------------------------------------------------*/ 5554 #undef __FUNCT__ 5555 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5556 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5557 { 5558 PetscErrorCode ierr; 5559 5560 PetscFunctionBegin; 5561 if (scall == MAT_INITIAL_MATRIX) { 5562 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5563 } 5564 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5565 PetscFunctionReturn(0); 5566 } 5567 5568 #if defined(PETSC_HAVE_MUMPS) 5569 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5570 #endif 5571 #if defined(PETSC_HAVE_PASTIX) 5572 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5573 #endif 5574 #if defined(PETSC_HAVE_SUPERLU_DIST) 5575 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5576 #endif 5577 #if defined(PETSC_HAVE_CLIQUE) 5578 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5579 #endif 5580 5581 /*MC 5582 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5583 5584 Options Database Keys: 5585 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5586 5587 Level: beginner 5588 5589 .seealso: MatCreateAIJ() 5590 M*/ 5591 5592 #undef __FUNCT__ 5593 #define __FUNCT__ "MatCreate_MPIAIJ" 5594 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5595 { 5596 Mat_MPIAIJ *b; 5597 PetscErrorCode ierr; 5598 PetscMPIInt size; 5599 5600 PetscFunctionBegin; 5601 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5602 5603 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5604 B->data = (void*)b; 5605 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5606 B->assembled = PETSC_FALSE; 5607 B->insertmode = NOT_SET_VALUES; 5608 b->size = size; 5609 5610 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5611 5612 /* build cache for off array entries formed */ 5613 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5614 5615 b->donotstash = PETSC_FALSE; 5616 b->colmap = 0; 5617 b->garray = 0; 5618 b->roworiented = PETSC_TRUE; 5619 5620 /* stuff used for matrix vector multiply */ 5621 b->lvec = NULL; 5622 b->Mvctx = NULL; 5623 5624 /* stuff for MatGetRow() */ 5625 b->rowindices = 0; 5626 b->rowvalues = 0; 5627 b->getrowactive = PETSC_FALSE; 5628 5629 /* flexible pointer used in CUSP/CUSPARSE classes */ 5630 b->spptr = NULL; 5631 5632 #if defined(PETSC_HAVE_MUMPS) 5633 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C","MatGetFactor_aij_mumps",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5634 #endif 5635 #if defined(PETSC_HAVE_PASTIX) 5636 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C","MatGetFactor_mpiaij_pastix",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5637 #endif 5638 #if defined(PETSC_HAVE_SUPERLU_DIST) 5639 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C","MatGetFactor_mpiaij_superlu_dist",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5640 #endif 5641 #if defined(PETSC_HAVE_CLIQUE) 5642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C","MatGetFactor_aij_clique",MatGetFactor_aij_clique);CHKERRQ(ierr); 5643 #endif 5644 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_MPIAIJ",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5645 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_MPIAIJ",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5646 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C","MatGetDiagonalBlock_MPIAIJ",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5647 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C","MatIsTranspose_MPIAIJ",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5648 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C","MatMPIAIJSetPreallocation_MPIAIJ",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5649 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C","MatMPIAIJSetPreallocationCSR_MPIAIJ",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5650 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C","MatDiagonalScaleLocal_MPIAIJ",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5651 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C","MatConvert_MPIAIJ_MPIAIJPERM",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5652 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C","MatConvert_MPIAIJ_MPIAIJCRL",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5653 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C","MatConvert_MPIAIJ_MPISBAIJ",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5654 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C","MatMatMult_MPIDense_MPIAIJ",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5655 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C","MatMatMultSymbolic_MPIDense_MPIAIJ",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5656 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C","MatMatMultNumeric_MPIDense_MPIAIJ",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5657 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5658 PetscFunctionReturn(0); 5659 } 5660 5661 #undef __FUNCT__ 5662 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5663 /*@ 5664 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5665 and "off-diagonal" part of the matrix in CSR format. 5666 5667 Collective on MPI_Comm 5668 5669 Input Parameters: 5670 + comm - MPI communicator 5671 . m - number of local rows (Cannot be PETSC_DECIDE) 5672 . n - This value should be the same as the local size used in creating the 5673 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5674 calculated if N is given) For square matrices n is almost always m. 5675 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5676 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5677 . i - row indices for "diagonal" portion of matrix 5678 . j - column indices 5679 . a - matrix values 5680 . oi - row indices for "off-diagonal" portion of matrix 5681 . oj - column indices 5682 - oa - matrix values 5683 5684 Output Parameter: 5685 . mat - the matrix 5686 5687 Level: advanced 5688 5689 Notes: 5690 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5691 must free the arrays once the matrix has been destroyed and not before. 5692 5693 The i and j indices are 0 based 5694 5695 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5696 5697 This sets local rows and cannot be used to set off-processor values. 5698 5699 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5700 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5701 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5702 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5703 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5704 communication if it is known that only local entries will be set. 5705 5706 .keywords: matrix, aij, compressed row, sparse, parallel 5707 5708 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5709 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5710 @*/ 5711 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5712 { 5713 PetscErrorCode ierr; 5714 Mat_MPIAIJ *maij; 5715 5716 PetscFunctionBegin; 5717 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5718 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5719 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5720 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5721 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5722 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5723 maij = (Mat_MPIAIJ*) (*mat)->data; 5724 5725 (*mat)->preallocated = PETSC_TRUE; 5726 5727 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5728 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5729 5730 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5731 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5732 5733 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5734 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5735 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5736 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5737 5738 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5739 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5740 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5741 PetscFunctionReturn(0); 5742 } 5743 5744 /* 5745 Special version for direct calls from Fortran 5746 */ 5747 #include <petsc-private/fortranimpl.h> 5748 5749 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5750 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5751 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5752 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5753 #endif 5754 5755 /* Change these macros so can be used in void function */ 5756 #undef CHKERRQ 5757 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5758 #undef SETERRQ2 5759 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5760 #undef SETERRQ3 5761 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5762 #undef SETERRQ 5763 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5764 5765 #undef __FUNCT__ 5766 #define __FUNCT__ "matsetvaluesmpiaij_" 5767 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5768 { 5769 Mat mat = *mmat; 5770 PetscInt m = *mm, n = *mn; 5771 InsertMode addv = *maddv; 5772 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5773 PetscScalar value; 5774 PetscErrorCode ierr; 5775 5776 MatCheckPreallocated(mat,1); 5777 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5778 5779 #if defined(PETSC_USE_DEBUG) 5780 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5781 #endif 5782 { 5783 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5784 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5785 PetscBool roworiented = aij->roworiented; 5786 5787 /* Some Variables required in the macro */ 5788 Mat A = aij->A; 5789 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5790 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5791 MatScalar *aa = a->a; 5792 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5793 Mat B = aij->B; 5794 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5795 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5796 MatScalar *ba = b->a; 5797 5798 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5799 PetscInt nonew = a->nonew; 5800 MatScalar *ap1,*ap2; 5801 5802 PetscFunctionBegin; 5803 for (i=0; i<m; i++) { 5804 if (im[i] < 0) continue; 5805 #if defined(PETSC_USE_DEBUG) 5806 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5807 #endif 5808 if (im[i] >= rstart && im[i] < rend) { 5809 row = im[i] - rstart; 5810 lastcol1 = -1; 5811 rp1 = aj + ai[row]; 5812 ap1 = aa + ai[row]; 5813 rmax1 = aimax[row]; 5814 nrow1 = ailen[row]; 5815 low1 = 0; 5816 high1 = nrow1; 5817 lastcol2 = -1; 5818 rp2 = bj + bi[row]; 5819 ap2 = ba + bi[row]; 5820 rmax2 = bimax[row]; 5821 nrow2 = bilen[row]; 5822 low2 = 0; 5823 high2 = nrow2; 5824 5825 for (j=0; j<n; j++) { 5826 if (roworiented) value = v[i*n+j]; 5827 else value = v[i+j*m]; 5828 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5829 if (in[j] >= cstart && in[j] < cend) { 5830 col = in[j] - cstart; 5831 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5832 } else if (in[j] < 0) continue; 5833 #if defined(PETSC_USE_DEBUG) 5834 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5835 #endif 5836 else { 5837 if (mat->was_assembled) { 5838 if (!aij->colmap) { 5839 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5840 } 5841 #if defined(PETSC_USE_CTABLE) 5842 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5843 col--; 5844 #else 5845 col = aij->colmap[in[j]] - 1; 5846 #endif 5847 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5848 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5849 col = in[j]; 5850 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5851 B = aij->B; 5852 b = (Mat_SeqAIJ*)B->data; 5853 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5854 rp2 = bj + bi[row]; 5855 ap2 = ba + bi[row]; 5856 rmax2 = bimax[row]; 5857 nrow2 = bilen[row]; 5858 low2 = 0; 5859 high2 = nrow2; 5860 bm = aij->B->rmap->n; 5861 ba = b->a; 5862 } 5863 } else col = in[j]; 5864 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5865 } 5866 } 5867 } else if (!aij->donotstash) { 5868 if (roworiented) { 5869 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5870 } else { 5871 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5872 } 5873 } 5874 } 5875 } 5876 PetscFunctionReturnVoid(); 5877 } 5878 5879