1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 6 /*MC 7 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 8 9 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 10 and MATMPIAIJ otherwise. As a result, for single process communicators, 11 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 12 for communicators controlling multiple processes. It is recommended that you call both of 13 the above preallocation routines for simplicity. 14 15 Options Database Keys: 16 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 17 18 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 19 enough exist. 20 21 Level: beginner 22 23 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 24 M*/ 25 26 /*MC 27 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 28 29 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 30 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 31 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 32 for communicators controlling multiple processes. It is recommended that you call both of 33 the above preallocation routines for simplicity. 34 35 Options Database Keys: 36 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 37 38 Level: beginner 39 40 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 41 M*/ 42 43 #undef __FUNCT__ 44 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 45 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 46 { 47 PetscErrorCode ierr; 48 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 49 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 50 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 51 const PetscInt *ia,*ib; 52 const MatScalar *aa,*bb; 53 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 54 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 55 56 PetscFunctionBegin; 57 *keptrows = 0; 58 ia = a->i; 59 ib = b->i; 60 for (i=0; i<m; i++) { 61 na = ia[i+1] - ia[i]; 62 nb = ib[i+1] - ib[i]; 63 if (!na && !nb) { 64 cnt++; 65 goto ok1; 66 } 67 aa = a->a + ia[i]; 68 for (j=0; j<na; j++) { 69 if (aa[j] != 0.0) goto ok1; 70 } 71 bb = b->a + ib[i]; 72 for (j=0; j <nb; j++) { 73 if (bb[j] != 0.0) goto ok1; 74 } 75 cnt++; 76 ok1:; 77 } 78 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 79 if (!n0rows) PetscFunctionReturn(0); 80 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 81 cnt = 0; 82 for (i=0; i<m; i++) { 83 na = ia[i+1] - ia[i]; 84 nb = ib[i+1] - ib[i]; 85 if (!na && !nb) continue; 86 aa = a->a + ia[i]; 87 for (j=0; j<na;j++) { 88 if (aa[j] != 0.0) { 89 rows[cnt++] = rstart + i; 90 goto ok2; 91 } 92 } 93 bb = b->a + ib[i]; 94 for (j=0; j<nb; j++) { 95 if (bb[j] != 0.0) { 96 rows[cnt++] = rstart + i; 97 goto ok2; 98 } 99 } 100 ok2:; 101 } 102 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 108 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 109 { 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 111 PetscErrorCode ierr; 112 PetscInt i,rstart,nrows,*rows; 113 114 PetscFunctionBegin; 115 *zrows = NULL; 116 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 117 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 118 for (i=0; i<nrows; i++) rows[i] += rstart; 119 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 125 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 126 { 127 PetscErrorCode ierr; 128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 129 PetscInt i,n,*garray = aij->garray; 130 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 131 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 132 PetscReal *work; 133 134 PetscFunctionBegin; 135 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 136 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 137 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 CHKMEMQ; 197 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 199 if (!rank) { 200 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 201 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 202 } 203 if (reuse == MAT_INITIAL_MATRIX) { 204 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 205 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 206 if (!rank) { 207 bses[0] = gmat->rmap->bs; 208 bses[1] = gmat->cmap->bs; 209 } 210 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 211 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 212 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 213 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 214 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 215 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 216 217 rowners[0] = 0; 218 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 219 rstart = rowners[rank]; 220 rend = rowners[rank+1]; 221 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 222 if (!rank) { 223 gmata = (Mat_SeqAIJ*) gmat->data; 224 /* send row lengths to all processors */ 225 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 226 for (i=1; i<size; i++) { 227 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 228 } 229 /* determine number diagonal and off-diagonal counts */ 230 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 231 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 232 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 233 jj = 0; 234 for (i=0; i<m; i++) { 235 for (j=0; j<dlens[i]; j++) { 236 if (gmata->j[jj] < rstart) ld[i]++; 237 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 238 jj++; 239 } 240 } 241 /* send column indices to other processes */ 242 for (i=1; i<size; i++) { 243 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 244 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 245 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 246 } 247 248 /* send numerical values to other processes */ 249 for (i=1; i<size; i++) { 250 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 251 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 252 } 253 gmataa = gmata->a; 254 gmataj = gmata->j; 255 256 } else { 257 /* receive row lengths */ 258 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 /* receive column indices */ 260 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 262 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 263 /* determine number diagonal and off-diagonal counts */ 264 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 265 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 266 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 267 jj = 0; 268 for (i=0; i<m; i++) { 269 for (j=0; j<dlens[i]; j++) { 270 if (gmataj[jj] < rstart) ld[i]++; 271 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 272 jj++; 273 } 274 } 275 /* receive numerical values */ 276 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 277 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 278 } 279 /* set preallocation */ 280 for (i=0; i<m; i++) { 281 dlens[i] -= olens[i]; 282 } 283 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 284 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 285 286 for (i=0; i<m; i++) { 287 dlens[i] += olens[i]; 288 } 289 cnt = 0; 290 for (i=0; i<m; i++) { 291 row = rstart + i; 292 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 293 cnt += dlens[i]; 294 } 295 if (rank) { 296 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 297 } 298 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 299 ierr = PetscFree(rowners);CHKERRQ(ierr); 300 301 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 302 303 *inmat = mat; 304 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 307 mat = *inmat; 308 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 309 if (!rank) { 310 /* send numerical values to other processes */ 311 gmata = (Mat_SeqAIJ*) gmat->data; 312 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 313 gmataa = gmata->a; 314 for (i=1; i<size; i++) { 315 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 316 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 317 } 318 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 319 } else { 320 /* receive numerical values from process 0*/ 321 nz = Ad->nz + Ao->nz; 322 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 326 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 327 ad = Ad->a; 328 ao = Ao->a; 329 if (mat->rmap->n) { 330 i = 0; 331 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 for (i=1; i<mat->rmap->n; i++) { 335 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 336 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 337 } 338 i--; 339 if (mat->rmap->n) { 340 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 341 } 342 if (rank) { 343 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 344 } 345 } 346 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 CHKMEMQ; 349 PetscFunctionReturn(0); 350 } 351 352 /* 353 Local utility routine that creates a mapping from the global column 354 number to the local number in the off-diagonal part of the local 355 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 356 a slightly higher hash table cost; without it it is not scalable (each processor 357 has an order N integer array but is fast to acess. 358 */ 359 #undef __FUNCT__ 360 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 361 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 362 { 363 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 364 PetscErrorCode ierr; 365 PetscInt n = aij->B->cmap->n,i; 366 367 PetscFunctionBegin; 368 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 369 #if defined(PETSC_USE_CTABLE) 370 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 371 for (i=0; i<n; i++) { 372 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 373 } 374 #else 375 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 376 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 377 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 378 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 379 #endif 380 PetscFunctionReturn(0); 381 } 382 383 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 384 { \ 385 if (col <= lastcol1) low1 = 0; \ 386 else high1 = nrow1; \ 387 lastcol1 = col;\ 388 while (high1-low1 > 5) { \ 389 t = (low1+high1)/2; \ 390 if (rp1[t] > col) high1 = t; \ 391 else low1 = t; \ 392 } \ 393 for (_i=low1; _i<high1; _i++) { \ 394 if (rp1[_i] > col) break; \ 395 if (rp1[_i] == col) { \ 396 if (addv == ADD_VALUES) ap1[_i] += value; \ 397 else ap1[_i] = value; \ 398 goto a_noinsert; \ 399 } \ 400 } \ 401 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 402 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 403 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 404 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 405 N = nrow1++ - 1; a->nz++; high1++; \ 406 /* shift up all the later entries in this row */ \ 407 for (ii=N; ii>=_i; ii--) { \ 408 rp1[ii+1] = rp1[ii]; \ 409 ap1[ii+1] = ap1[ii]; \ 410 } \ 411 rp1[_i] = col; \ 412 ap1[_i] = value; \ 413 a_noinsert: ; \ 414 ailen[row] = nrow1; \ 415 } 416 417 418 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 419 { \ 420 if (col <= lastcol2) low2 = 0; \ 421 else high2 = nrow2; \ 422 lastcol2 = col; \ 423 while (high2-low2 > 5) { \ 424 t = (low2+high2)/2; \ 425 if (rp2[t] > col) high2 = t; \ 426 else low2 = t; \ 427 } \ 428 for (_i=low2; _i<high2; _i++) { \ 429 if (rp2[_i] > col) break; \ 430 if (rp2[_i] == col) { \ 431 if (addv == ADD_VALUES) ap2[_i] += value; \ 432 else ap2[_i] = value; \ 433 goto b_noinsert; \ 434 } \ 435 } \ 436 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 437 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 438 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 439 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 440 N = nrow2++ - 1; b->nz++; high2++; \ 441 /* shift up all the later entries in this row */ \ 442 for (ii=N; ii>=_i; ii--) { \ 443 rp2[ii+1] = rp2[ii]; \ 444 ap2[ii+1] = ap2[ii]; \ 445 } \ 446 rp2[_i] = col; \ 447 ap2[_i] = value; \ 448 b_noinsert: ; \ 449 bilen[row] = nrow2; \ 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 454 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 455 { 456 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 457 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 458 PetscErrorCode ierr; 459 PetscInt l,*garray = mat->garray,diag; 460 461 PetscFunctionBegin; 462 /* code only works for square matrices A */ 463 464 /* find size of row to the left of the diagonal part */ 465 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 466 row = row - diag; 467 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 468 if (garray[b->j[b->i[row]+l]] > diag) break; 469 } 470 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 471 472 /* diagonal part */ 473 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 474 475 /* right of diagonal part */ 476 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 477 PetscFunctionReturn(0); 478 } 479 480 #undef __FUNCT__ 481 #define __FUNCT__ "MatSetValues_MPIAIJ" 482 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 483 { 484 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 485 PetscScalar value; 486 PetscErrorCode ierr; 487 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 488 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 489 PetscBool roworiented = aij->roworiented; 490 491 /* Some Variables required in the macro */ 492 Mat A = aij->A; 493 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 494 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 495 MatScalar *aa = a->a; 496 PetscBool ignorezeroentries = a->ignorezeroentries; 497 Mat B = aij->B; 498 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 499 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 500 MatScalar *ba = b->a; 501 502 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 503 PetscInt nonew; 504 MatScalar *ap1,*ap2; 505 506 PetscFunctionBegin; 507 if (v) PetscValidScalarPointer(v,6); 508 for (i=0; i<m; i++) { 509 if (im[i] < 0) continue; 510 #if defined(PETSC_USE_DEBUG) 511 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 512 #endif 513 if (im[i] >= rstart && im[i] < rend) { 514 row = im[i] - rstart; 515 lastcol1 = -1; 516 rp1 = aj + ai[row]; 517 ap1 = aa + ai[row]; 518 rmax1 = aimax[row]; 519 nrow1 = ailen[row]; 520 low1 = 0; 521 high1 = nrow1; 522 lastcol2 = -1; 523 rp2 = bj + bi[row]; 524 ap2 = ba + bi[row]; 525 rmax2 = bimax[row]; 526 nrow2 = bilen[row]; 527 low2 = 0; 528 high2 = nrow2; 529 530 for (j=0; j<n; j++) { 531 if (v) { 532 if (roworiented) value = v[i*n+j]; 533 else value = v[i+j*m]; 534 } else value = 0.0; 535 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 536 if (in[j] >= cstart && in[j] < cend) { 537 col = in[j] - cstart; 538 nonew = a->nonew; 539 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 540 } else if (in[j] < 0) continue; 541 #if defined(PETSC_USE_DEBUG) 542 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 543 #endif 544 else { 545 if (mat->was_assembled) { 546 if (!aij->colmap) { 547 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 548 } 549 #if defined(PETSC_USE_CTABLE) 550 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 551 col--; 552 #else 553 col = aij->colmap[in[j]] - 1; 554 #endif 555 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 556 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 557 col = in[j]; 558 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 559 B = aij->B; 560 b = (Mat_SeqAIJ*)B->data; 561 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 562 rp2 = bj + bi[row]; 563 ap2 = ba + bi[row]; 564 rmax2 = bimax[row]; 565 nrow2 = bilen[row]; 566 low2 = 0; 567 high2 = nrow2; 568 bm = aij->B->rmap->n; 569 ba = b->a; 570 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 571 } else col = in[j]; 572 nonew = b->nonew; 573 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 574 } 575 } 576 } else { 577 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 578 if (!aij->donotstash) { 579 mat->assembled = PETSC_FALSE; 580 if (roworiented) { 581 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 582 } else { 583 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 584 } 585 } 586 } 587 } 588 PetscFunctionReturn(0); 589 } 590 591 #undef __FUNCT__ 592 #define __FUNCT__ "MatGetValues_MPIAIJ" 593 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 594 { 595 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 596 PetscErrorCode ierr; 597 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 598 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 599 600 PetscFunctionBegin; 601 for (i=0; i<m; i++) { 602 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 603 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 604 if (idxm[i] >= rstart && idxm[i] < rend) { 605 row = idxm[i] - rstart; 606 for (j=0; j<n; j++) { 607 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 608 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 609 if (idxn[j] >= cstart && idxn[j] < cend) { 610 col = idxn[j] - cstart; 611 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 612 } else { 613 if (!aij->colmap) { 614 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 615 } 616 #if defined(PETSC_USE_CTABLE) 617 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 618 col--; 619 #else 620 col = aij->colmap[idxn[j]] - 1; 621 #endif 622 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 623 else { 624 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 625 } 626 } 627 } 628 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 629 } 630 PetscFunctionReturn(0); 631 } 632 633 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 634 635 #undef __FUNCT__ 636 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 637 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 638 { 639 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 640 PetscErrorCode ierr; 641 PetscInt nstash,reallocs; 642 InsertMode addv; 643 644 PetscFunctionBegin; 645 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 646 647 /* make sure all processors are either in INSERTMODE or ADDMODE */ 648 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 649 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 650 mat->insertmode = addv; /* in case this processor had no cache */ 651 652 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 653 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 654 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 655 PetscFunctionReturn(0); 656 } 657 658 #undef __FUNCT__ 659 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 660 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 661 { 662 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 663 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 664 PetscErrorCode ierr; 665 PetscMPIInt n; 666 PetscInt i,j,rstart,ncols,flg; 667 PetscInt *row,*col; 668 PetscBool other_disassembled; 669 PetscScalar *val; 670 InsertMode addv = mat->insertmode; 671 672 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 673 674 PetscFunctionBegin; 675 if (!aij->donotstash && !mat->nooffprocentries) { 676 while (1) { 677 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 678 if (!flg) break; 679 680 for (i=0; i<n; ) { 681 /* Now identify the consecutive vals belonging to the same row */ 682 for (j=i,rstart=row[j]; j<n; j++) { 683 if (row[j] != rstart) break; 684 } 685 if (j < n) ncols = j-i; 686 else ncols = n-i; 687 /* Now assemble all these values with a single function call */ 688 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 689 690 i = j; 691 } 692 } 693 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 694 } 695 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 696 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 697 698 /* determine if any processor has disassembled, if so we must 699 also disassemble ourselfs, in order that we may reassemble. */ 700 /* 701 if nonzero structure of submatrix B cannot change then we know that 702 no processor disassembled thus we can skip this stuff 703 */ 704 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 705 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 706 if (mat->was_assembled && !other_disassembled) { 707 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 708 } 709 } 710 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 711 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 712 } 713 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 714 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 715 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 716 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 717 718 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 719 720 aij->rowvalues = 0; 721 722 /* used by MatAXPY() */ 723 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 724 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 725 726 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 727 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 733 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 734 { 735 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 740 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatZeroRows_MPIAIJ" 746 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 747 { 748 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 749 PetscErrorCode ierr; 750 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 751 PetscInt i,*owners = A->rmap->range; 752 PetscInt *nprocs,j,idx,nsends,row; 753 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 754 PetscInt *rvalues,count,base,slen,*source; 755 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 756 MPI_Comm comm; 757 MPI_Request *send_waits,*recv_waits; 758 MPI_Status recv_status,*send_status; 759 const PetscScalar *xx; 760 PetscScalar *bb; 761 #if defined(PETSC_DEBUG) 762 PetscBool found = PETSC_FALSE; 763 #endif 764 765 PetscFunctionBegin; 766 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 767 /* first count number of contributors to each processor */ 768 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 769 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 770 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 771 j = 0; 772 for (i=0; i<N; i++) { 773 if (lastidx > (idx = rows[i])) j = 0; 774 lastidx = idx; 775 for (; j<size; j++) { 776 if (idx >= owners[j] && idx < owners[j+1]) { 777 nprocs[2*j]++; 778 nprocs[2*j+1] = 1; 779 owner[i] = j; 780 #if defined(PETSC_DEBUG) 781 found = PETSC_TRUE; 782 #endif 783 break; 784 } 785 } 786 #if defined(PETSC_DEBUG) 787 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 788 found = PETSC_FALSE; 789 #endif 790 } 791 nsends = 0; 792 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 793 794 if (A->nooffproczerorows) { 795 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 796 nrecvs = nsends; 797 nmax = N; 798 } else { 799 /* inform other processors of number of messages and max length*/ 800 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 801 } 802 803 /* post receives: */ 804 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 805 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 806 for (i=0; i<nrecvs; i++) { 807 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 808 } 809 810 /* do sends: 811 1) starts[i] gives the starting index in svalues for stuff going to 812 the ith processor 813 */ 814 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 815 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 816 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 817 818 starts[0] = 0; 819 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 820 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 821 822 starts[0] = 0; 823 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 824 count = 0; 825 for (i=0; i<size; i++) { 826 if (nprocs[2*i+1]) { 827 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 828 } 829 } 830 ierr = PetscFree(starts);CHKERRQ(ierr); 831 832 base = owners[rank]; 833 834 /* wait on receives */ 835 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 836 count = nrecvs; slen = 0; 837 while (count) { 838 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 839 /* unpack receives into our local space */ 840 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 841 842 source[imdex] = recv_status.MPI_SOURCE; 843 lens[imdex] = n; 844 slen += n; 845 count--; 846 } 847 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 848 849 /* move the data into the send scatter */ 850 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 851 count = 0; 852 for (i=0; i<nrecvs; i++) { 853 values = rvalues + i*nmax; 854 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 855 } 856 ierr = PetscFree(rvalues);CHKERRQ(ierr); 857 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 858 ierr = PetscFree(owner);CHKERRQ(ierr); 859 ierr = PetscFree(nprocs);CHKERRQ(ierr); 860 861 /* fix right hand side if needed */ 862 if (x && b) { 863 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 864 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 865 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 866 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 867 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 868 } 869 /* 870 Zero the required rows. If the "diagonal block" of the matrix 871 is square and the user wishes to set the diagonal we use separate 872 code so that MatSetValues() is not called for each diagonal allocating 873 new memory, thus calling lots of mallocs and slowing things down. 874 875 */ 876 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 877 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 878 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 879 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 880 } else if (diag != 0.0) { 881 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 882 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 883 for (i = 0; i < slen; i++) { 884 row = lrows[i] + rstart; 885 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 886 } 887 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 889 } else { 890 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 891 } 892 ierr = PetscFree(lrows);CHKERRQ(ierr); 893 894 /* wait on sends */ 895 if (nsends) { 896 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 897 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 898 ierr = PetscFree(send_status);CHKERRQ(ierr); 899 } 900 ierr = PetscFree(send_waits);CHKERRQ(ierr); 901 ierr = PetscFree(svalues);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } 904 905 #undef __FUNCT__ 906 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 907 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 908 { 909 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 910 PetscErrorCode ierr; 911 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 912 PetscInt i,*owners = A->rmap->range; 913 PetscInt *nprocs,j,idx,nsends; 914 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 915 PetscInt *rvalues,count,base,slen,*source; 916 PetscInt *lens,*lrows,*values,m; 917 MPI_Comm comm; 918 MPI_Request *send_waits,*recv_waits; 919 MPI_Status recv_status,*send_status; 920 const PetscScalar *xx; 921 PetscScalar *bb,*mask; 922 Vec xmask,lmask; 923 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 924 const PetscInt *aj, *ii,*ridx; 925 PetscScalar *aa; 926 #if defined(PETSC_DEBUG) 927 PetscBool found = PETSC_FALSE; 928 #endif 929 930 PetscFunctionBegin; 931 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 932 /* first count number of contributors to each processor */ 933 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 934 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 935 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 936 j = 0; 937 for (i=0; i<N; i++) { 938 if (lastidx > (idx = rows[i])) j = 0; 939 lastidx = idx; 940 for (; j<size; j++) { 941 if (idx >= owners[j] && idx < owners[j+1]) { 942 nprocs[2*j]++; 943 nprocs[2*j+1] = 1; 944 owner[i] = j; 945 #if defined(PETSC_DEBUG) 946 found = PETSC_TRUE; 947 #endif 948 break; 949 } 950 } 951 #if defined(PETSC_DEBUG) 952 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 953 found = PETSC_FALSE; 954 #endif 955 } 956 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 957 958 /* inform other processors of number of messages and max length*/ 959 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 960 961 /* post receives: */ 962 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 963 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 964 for (i=0; i<nrecvs; i++) { 965 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 966 } 967 968 /* do sends: 969 1) starts[i] gives the starting index in svalues for stuff going to 970 the ith processor 971 */ 972 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 973 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 974 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 975 976 starts[0] = 0; 977 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 978 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 979 980 starts[0] = 0; 981 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 982 count = 0; 983 for (i=0; i<size; i++) { 984 if (nprocs[2*i+1]) { 985 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 986 } 987 } 988 ierr = PetscFree(starts);CHKERRQ(ierr); 989 990 base = owners[rank]; 991 992 /* wait on receives */ 993 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 994 count = nrecvs; slen = 0; 995 while (count) { 996 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 997 /* unpack receives into our local space */ 998 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 999 1000 source[imdex] = recv_status.MPI_SOURCE; 1001 lens[imdex] = n; 1002 slen += n; 1003 count--; 1004 } 1005 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1006 1007 /* move the data into the send scatter */ 1008 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1009 count = 0; 1010 for (i=0; i<nrecvs; i++) { 1011 values = rvalues + i*nmax; 1012 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1013 } 1014 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1015 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1016 ierr = PetscFree(owner);CHKERRQ(ierr); 1017 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1018 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1019 1020 /* zero diagonal part of matrix */ 1021 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1022 1023 /* handle off diagonal part of matrix */ 1024 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1025 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1026 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1027 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1028 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1029 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1031 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1032 if (x) { 1033 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1035 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1036 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1037 } 1038 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1039 1040 /* remove zeroed rows of off diagonal matrix */ 1041 ii = aij->i; 1042 for (i=0; i<slen; i++) { 1043 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1044 } 1045 1046 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1047 if (aij->compressedrow.use) { 1048 m = aij->compressedrow.nrows; 1049 ii = aij->compressedrow.i; 1050 ridx = aij->compressedrow.rindex; 1051 for (i=0; i<m; i++) { 1052 n = ii[i+1] - ii[i]; 1053 aj = aij->j + ii[i]; 1054 aa = aij->a + ii[i]; 1055 1056 for (j=0; j<n; j++) { 1057 if (PetscAbsScalar(mask[*aj])) { 1058 if (b) bb[*ridx] -= *aa*xx[*aj]; 1059 *aa = 0.0; 1060 } 1061 aa++; 1062 aj++; 1063 } 1064 ridx++; 1065 } 1066 } else { /* do not use compressed row format */ 1067 m = l->B->rmap->n; 1068 for (i=0; i<m; i++) { 1069 n = ii[i+1] - ii[i]; 1070 aj = aij->j + ii[i]; 1071 aa = aij->a + ii[i]; 1072 for (j=0; j<n; j++) { 1073 if (PetscAbsScalar(mask[*aj])) { 1074 if (b) bb[i] -= *aa*xx[*aj]; 1075 *aa = 0.0; 1076 } 1077 aa++; 1078 aj++; 1079 } 1080 } 1081 } 1082 if (x) { 1083 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1084 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1085 } 1086 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1087 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1088 ierr = PetscFree(lrows);CHKERRQ(ierr); 1089 1090 /* wait on sends */ 1091 if (nsends) { 1092 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1093 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1094 ierr = PetscFree(send_status);CHKERRQ(ierr); 1095 } 1096 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1097 ierr = PetscFree(svalues);CHKERRQ(ierr); 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatMult_MPIAIJ" 1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1104 { 1105 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 PetscInt nt; 1108 1109 PetscFunctionBegin; 1110 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1111 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1112 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1113 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1114 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1115 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1121 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1122 { 1123 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 #undef __FUNCT__ 1132 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1133 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1134 { 1135 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1136 PetscErrorCode ierr; 1137 1138 PetscFunctionBegin; 1139 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1141 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1142 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1148 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1149 { 1150 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1151 PetscErrorCode ierr; 1152 PetscBool merged; 1153 1154 PetscFunctionBegin; 1155 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1156 /* do nondiagonal part */ 1157 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1158 if (!merged) { 1159 /* send it on its way */ 1160 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1161 /* do local part */ 1162 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1163 /* receive remote parts: note this assumes the values are not actually */ 1164 /* added in yy until the next line, */ 1165 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1166 } else { 1167 /* do local part */ 1168 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1169 /* send it on its way */ 1170 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1171 /* values actually were received in the Begin() but we need to call this nop */ 1172 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1173 } 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1179 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1180 { 1181 MPI_Comm comm; 1182 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1183 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1184 IS Me,Notme; 1185 PetscErrorCode ierr; 1186 PetscInt M,N,first,last,*notme,i; 1187 PetscMPIInt size; 1188 1189 PetscFunctionBegin; 1190 /* Easy test: symmetric diagonal block */ 1191 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1192 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1193 if (!*f) PetscFunctionReturn(0); 1194 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1196 if (size == 1) PetscFunctionReturn(0); 1197 1198 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1199 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1200 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1201 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1202 for (i=0; i<first; i++) notme[i] = i; 1203 for (i=last; i<M; i++) notme[i-last+first] = i; 1204 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1205 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1206 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1207 Aoff = Aoffs[0]; 1208 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1209 Boff = Boffs[0]; 1210 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1211 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1212 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1213 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1214 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1215 ierr = PetscFree(notme);CHKERRQ(ierr); 1216 PetscFunctionReturn(0); 1217 } 1218 1219 #undef __FUNCT__ 1220 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1221 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1222 { 1223 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1224 PetscErrorCode ierr; 1225 1226 PetscFunctionBegin; 1227 /* do nondiagonal part */ 1228 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1229 /* send it on its way */ 1230 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1231 /* do local part */ 1232 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1233 /* receive remote parts */ 1234 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 /* 1239 This only works correctly for square matrices where the subblock A->A is the 1240 diagonal block 1241 */ 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1244 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1245 { 1246 PetscErrorCode ierr; 1247 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1248 1249 PetscFunctionBegin; 1250 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1251 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1252 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "MatScale_MPIAIJ" 1258 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1259 { 1260 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1261 PetscErrorCode ierr; 1262 1263 PetscFunctionBegin; 1264 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1265 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatDestroy_MPIAIJ" 1271 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1272 { 1273 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1274 PetscErrorCode ierr; 1275 1276 PetscFunctionBegin; 1277 #if defined(PETSC_USE_LOG) 1278 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1279 #endif 1280 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1281 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1282 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1283 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1284 #if defined(PETSC_USE_CTABLE) 1285 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1286 #else 1287 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1288 #endif 1289 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1290 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1291 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1292 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1293 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1294 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1295 1296 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",NULL);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1311 { 1312 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1313 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1314 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1315 PetscErrorCode ierr; 1316 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1317 int fd; 1318 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1319 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1320 PetscScalar *column_values; 1321 PetscInt message_count,flowcontrolcount; 1322 FILE *file; 1323 1324 PetscFunctionBegin; 1325 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1326 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1327 nz = A->nz + B->nz; 1328 if (!rank) { 1329 header[0] = MAT_FILE_CLASSID; 1330 header[1] = mat->rmap->N; 1331 header[2] = mat->cmap->N; 1332 1333 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1334 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1335 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1336 /* get largest number of rows any processor has */ 1337 rlen = mat->rmap->n; 1338 range = mat->rmap->range; 1339 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1340 } else { 1341 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1342 rlen = mat->rmap->n; 1343 } 1344 1345 /* load up the local row counts */ 1346 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1347 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1348 1349 /* store the row lengths to the file */ 1350 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1351 if (!rank) { 1352 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1353 for (i=1; i<size; i++) { 1354 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1355 rlen = range[i+1] - range[i]; 1356 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1357 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1358 } 1359 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1360 } else { 1361 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1362 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1363 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1364 } 1365 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1366 1367 /* load up the local column indices */ 1368 nzmax = nz; /* th processor needs space a largest processor needs */ 1369 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1370 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1371 cnt = 0; 1372 for (i=0; i<mat->rmap->n; i++) { 1373 for (j=B->i[i]; j<B->i[i+1]; j++) { 1374 if ((col = garray[B->j[j]]) > cstart) break; 1375 column_indices[cnt++] = col; 1376 } 1377 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1378 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1379 } 1380 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1381 1382 /* store the column indices to the file */ 1383 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1384 if (!rank) { 1385 MPI_Status status; 1386 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1387 for (i=1; i<size; i++) { 1388 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1389 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1390 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1391 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1392 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1393 } 1394 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1395 } else { 1396 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1397 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1398 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1399 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1400 } 1401 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1402 1403 /* load up the local column values */ 1404 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1405 cnt = 0; 1406 for (i=0; i<mat->rmap->n; i++) { 1407 for (j=B->i[i]; j<B->i[i+1]; j++) { 1408 if (garray[B->j[j]] > cstart) break; 1409 column_values[cnt++] = B->a[j]; 1410 } 1411 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1412 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1413 } 1414 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1415 1416 /* store the column values to the file */ 1417 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1418 if (!rank) { 1419 MPI_Status status; 1420 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1421 for (i=1; i<size; i++) { 1422 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1423 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1424 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1425 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1426 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1427 } 1428 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1429 } else { 1430 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1431 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1432 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1433 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1434 } 1435 ierr = PetscFree(column_values);CHKERRQ(ierr); 1436 1437 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1438 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1439 PetscFunctionReturn(0); 1440 } 1441 1442 #include <petscdraw.h> 1443 #undef __FUNCT__ 1444 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1445 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1446 { 1447 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1448 PetscErrorCode ierr; 1449 PetscMPIInt rank = aij->rank,size = aij->size; 1450 PetscBool isdraw,iascii,isbinary; 1451 PetscViewer sviewer; 1452 PetscViewerFormat format; 1453 1454 PetscFunctionBegin; 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1456 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1457 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1458 if (iascii) { 1459 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1460 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1461 MatInfo info; 1462 PetscBool inodes; 1463 1464 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1465 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1466 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1467 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1468 if (!inodes) { 1469 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1470 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1471 } else { 1472 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1473 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1474 } 1475 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1476 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1477 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1478 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1479 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1480 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1481 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1482 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1483 PetscFunctionReturn(0); 1484 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1485 PetscInt inodecount,inodelimit,*inodes; 1486 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1487 if (inodes) { 1488 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1489 } else { 1490 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1491 } 1492 PetscFunctionReturn(0); 1493 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1494 PetscFunctionReturn(0); 1495 } 1496 } else if (isbinary) { 1497 if (size == 1) { 1498 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1499 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1500 } else { 1501 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1502 } 1503 PetscFunctionReturn(0); 1504 } else if (isdraw) { 1505 PetscDraw draw; 1506 PetscBool isnull; 1507 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1508 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1509 } 1510 1511 if (size == 1) { 1512 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1513 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1514 } else { 1515 /* assemble the entire matrix onto first processor. */ 1516 Mat A; 1517 Mat_SeqAIJ *Aloc; 1518 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1519 MatScalar *a; 1520 1521 if (mat->rmap->N > 1024) { 1522 PetscBool flg = PETSC_FALSE; 1523 1524 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1525 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1526 } 1527 1528 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1529 if (!rank) { 1530 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1531 } else { 1532 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1533 } 1534 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1535 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1536 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1537 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1538 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1539 1540 /* copy over the A part */ 1541 Aloc = (Mat_SeqAIJ*)aij->A->data; 1542 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1543 row = mat->rmap->rstart; 1544 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1545 for (i=0; i<m; i++) { 1546 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1547 row++; 1548 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1549 } 1550 aj = Aloc->j; 1551 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1552 1553 /* copy over the B part */ 1554 Aloc = (Mat_SeqAIJ*)aij->B->data; 1555 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1556 row = mat->rmap->rstart; 1557 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1558 ct = cols; 1559 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1560 for (i=0; i<m; i++) { 1561 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1562 row++; 1563 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1564 } 1565 ierr = PetscFree(ct);CHKERRQ(ierr); 1566 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1567 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1568 /* 1569 Everyone has to call to draw the matrix since the graphics waits are 1570 synchronized across all processors that share the PetscDraw object 1571 */ 1572 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1573 if (!rank) { 1574 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1575 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1576 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1577 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1578 } 1579 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1580 ierr = MatDestroy(&A);CHKERRQ(ierr); 1581 } 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatView_MPIAIJ" 1587 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1588 { 1589 PetscErrorCode ierr; 1590 PetscBool iascii,isdraw,issocket,isbinary; 1591 1592 PetscFunctionBegin; 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1595 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1596 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1597 if (iascii || isdraw || isbinary || issocket) { 1598 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1599 } 1600 PetscFunctionReturn(0); 1601 } 1602 1603 #undef __FUNCT__ 1604 #define __FUNCT__ "MatSOR_MPIAIJ" 1605 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1606 { 1607 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1608 PetscErrorCode ierr; 1609 Vec bb1 = 0; 1610 PetscBool hasop; 1611 1612 PetscFunctionBegin; 1613 if (flag == SOR_APPLY_UPPER) { 1614 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1615 PetscFunctionReturn(0); 1616 } 1617 1618 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1619 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1620 } 1621 1622 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1623 if (flag & SOR_ZERO_INITIAL_GUESS) { 1624 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1625 its--; 1626 } 1627 1628 while (its--) { 1629 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1631 1632 /* update rhs: bb1 = bb - B*x */ 1633 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1634 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1635 1636 /* local sweep */ 1637 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1638 } 1639 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1640 if (flag & SOR_ZERO_INITIAL_GUESS) { 1641 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1642 its--; 1643 } 1644 while (its--) { 1645 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1647 1648 /* update rhs: bb1 = bb - B*x */ 1649 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1650 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1651 1652 /* local sweep */ 1653 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1654 } 1655 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1656 if (flag & SOR_ZERO_INITIAL_GUESS) { 1657 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1658 its--; 1659 } 1660 while (its--) { 1661 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1663 1664 /* update rhs: bb1 = bb - B*x */ 1665 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1666 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1667 1668 /* local sweep */ 1669 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1670 } 1671 } else if (flag & SOR_EISENSTAT) { 1672 Vec xx1; 1673 1674 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1675 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1676 1677 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1679 if (!mat->diag) { 1680 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1681 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1682 } 1683 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1684 if (hasop) { 1685 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1686 } else { 1687 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1688 } 1689 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1690 1691 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1692 1693 /* local sweep */ 1694 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1695 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1696 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1697 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1698 1699 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1700 PetscFunctionReturn(0); 1701 } 1702 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "MatPermute_MPIAIJ" 1705 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1706 { 1707 Mat aA,aB,Aperm; 1708 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1709 PetscScalar *aa,*ba; 1710 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1711 PetscSF rowsf,sf; 1712 IS parcolp = NULL; 1713 PetscBool done; 1714 PetscErrorCode ierr; 1715 1716 PetscFunctionBegin; 1717 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1718 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1719 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1720 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1721 1722 /* Invert row permutation to find out where my rows should go */ 1723 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1724 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1725 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1726 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1727 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1728 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1729 1730 /* Invert column permutation to find out where my columns should go */ 1731 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1732 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1733 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1734 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1735 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1736 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1737 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1738 1739 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1740 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1741 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1742 1743 /* Find out where my gcols should go */ 1744 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1745 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1746 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1747 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1748 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1749 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1750 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1751 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1752 1753 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1754 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1755 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1756 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1757 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1758 for (i=0; i<m; i++) { 1759 PetscInt row = rdest[i],rowner; 1760 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1761 for (j=ai[i]; j<ai[i+1]; j++) { 1762 PetscInt cowner,col = cdest[aj[j]]; 1763 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1764 if (rowner == cowner) dnnz[i]++; 1765 else onnz[i]++; 1766 } 1767 for (j=bi[i]; j<bi[i+1]; j++) { 1768 PetscInt cowner,col = gcdest[bj[j]]; 1769 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1770 if (rowner == cowner) dnnz[i]++; 1771 else onnz[i]++; 1772 } 1773 } 1774 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1775 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1776 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1777 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1778 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1779 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1780 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1781 1782 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1783 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1784 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1785 for (i=0; i<m; i++) { 1786 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1787 PetscInt rowlen; 1788 rowlen = ai[i+1] - ai[i]; 1789 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1790 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1791 rowlen = bi[i+1] - bi[i]; 1792 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1793 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1794 } 1795 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1797 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1798 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1799 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1800 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1801 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1802 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1803 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1804 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1805 *B = Aperm; 1806 PetscFunctionReturn(0); 1807 } 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1811 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1812 { 1813 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1814 Mat A = mat->A,B = mat->B; 1815 PetscErrorCode ierr; 1816 PetscReal isend[5],irecv[5]; 1817 1818 PetscFunctionBegin; 1819 info->block_size = 1.0; 1820 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1821 1822 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1823 isend[3] = info->memory; isend[4] = info->mallocs; 1824 1825 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1826 1827 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1828 isend[3] += info->memory; isend[4] += info->mallocs; 1829 if (flag == MAT_LOCAL) { 1830 info->nz_used = isend[0]; 1831 info->nz_allocated = isend[1]; 1832 info->nz_unneeded = isend[2]; 1833 info->memory = isend[3]; 1834 info->mallocs = isend[4]; 1835 } else if (flag == MAT_GLOBAL_MAX) { 1836 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1837 1838 info->nz_used = irecv[0]; 1839 info->nz_allocated = irecv[1]; 1840 info->nz_unneeded = irecv[2]; 1841 info->memory = irecv[3]; 1842 info->mallocs = irecv[4]; 1843 } else if (flag == MAT_GLOBAL_SUM) { 1844 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1845 1846 info->nz_used = irecv[0]; 1847 info->nz_allocated = irecv[1]; 1848 info->nz_unneeded = irecv[2]; 1849 info->memory = irecv[3]; 1850 info->mallocs = irecv[4]; 1851 } 1852 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1853 info->fill_ratio_needed = 0; 1854 info->factor_mallocs = 0; 1855 PetscFunctionReturn(0); 1856 } 1857 1858 #undef __FUNCT__ 1859 #define __FUNCT__ "MatSetOption_MPIAIJ" 1860 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1861 { 1862 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1863 PetscErrorCode ierr; 1864 1865 PetscFunctionBegin; 1866 switch (op) { 1867 case MAT_NEW_NONZERO_LOCATIONS: 1868 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1869 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1870 case MAT_KEEP_NONZERO_PATTERN: 1871 case MAT_NEW_NONZERO_LOCATION_ERR: 1872 case MAT_USE_INODES: 1873 case MAT_IGNORE_ZERO_ENTRIES: 1874 MatCheckPreallocated(A,1); 1875 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1876 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1877 break; 1878 case MAT_ROW_ORIENTED: 1879 a->roworiented = flg; 1880 1881 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1882 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1883 break; 1884 case MAT_NEW_DIAGONALS: 1885 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1886 break; 1887 case MAT_IGNORE_OFF_PROC_ENTRIES: 1888 a->donotstash = flg; 1889 break; 1890 case MAT_SPD: 1891 A->spd_set = PETSC_TRUE; 1892 A->spd = flg; 1893 if (flg) { 1894 A->symmetric = PETSC_TRUE; 1895 A->structurally_symmetric = PETSC_TRUE; 1896 A->symmetric_set = PETSC_TRUE; 1897 A->structurally_symmetric_set = PETSC_TRUE; 1898 } 1899 break; 1900 case MAT_SYMMETRIC: 1901 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1902 break; 1903 case MAT_STRUCTURALLY_SYMMETRIC: 1904 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1905 break; 1906 case MAT_HERMITIAN: 1907 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1908 break; 1909 case MAT_SYMMETRY_ETERNAL: 1910 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1911 break; 1912 default: 1913 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1914 } 1915 PetscFunctionReturn(0); 1916 } 1917 1918 #undef __FUNCT__ 1919 #define __FUNCT__ "MatGetRow_MPIAIJ" 1920 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1921 { 1922 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1923 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1924 PetscErrorCode ierr; 1925 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1926 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1927 PetscInt *cmap,*idx_p; 1928 1929 PetscFunctionBegin; 1930 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1931 mat->getrowactive = PETSC_TRUE; 1932 1933 if (!mat->rowvalues && (idx || v)) { 1934 /* 1935 allocate enough space to hold information from the longest row. 1936 */ 1937 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1938 PetscInt max = 1,tmp; 1939 for (i=0; i<matin->rmap->n; i++) { 1940 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1941 if (max < tmp) max = tmp; 1942 } 1943 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1944 } 1945 1946 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1947 lrow = row - rstart; 1948 1949 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1950 if (!v) {pvA = 0; pvB = 0;} 1951 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1952 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1953 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1954 nztot = nzA + nzB; 1955 1956 cmap = mat->garray; 1957 if (v || idx) { 1958 if (nztot) { 1959 /* Sort by increasing column numbers, assuming A and B already sorted */ 1960 PetscInt imark = -1; 1961 if (v) { 1962 *v = v_p = mat->rowvalues; 1963 for (i=0; i<nzB; i++) { 1964 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1965 else break; 1966 } 1967 imark = i; 1968 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1969 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1970 } 1971 if (idx) { 1972 *idx = idx_p = mat->rowindices; 1973 if (imark > -1) { 1974 for (i=0; i<imark; i++) { 1975 idx_p[i] = cmap[cworkB[i]]; 1976 } 1977 } else { 1978 for (i=0; i<nzB; i++) { 1979 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1980 else break; 1981 } 1982 imark = i; 1983 } 1984 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1985 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1986 } 1987 } else { 1988 if (idx) *idx = 0; 1989 if (v) *v = 0; 1990 } 1991 } 1992 *nz = nztot; 1993 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1994 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1995 PetscFunctionReturn(0); 1996 } 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 2000 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2001 { 2002 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2003 2004 PetscFunctionBegin; 2005 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2006 aij->getrowactive = PETSC_FALSE; 2007 PetscFunctionReturn(0); 2008 } 2009 2010 #undef __FUNCT__ 2011 #define __FUNCT__ "MatNorm_MPIAIJ" 2012 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2013 { 2014 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2015 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2016 PetscErrorCode ierr; 2017 PetscInt i,j,cstart = mat->cmap->rstart; 2018 PetscReal sum = 0.0; 2019 MatScalar *v; 2020 2021 PetscFunctionBegin; 2022 if (aij->size == 1) { 2023 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2024 } else { 2025 if (type == NORM_FROBENIUS) { 2026 v = amat->a; 2027 for (i=0; i<amat->nz; i++) { 2028 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2029 } 2030 v = bmat->a; 2031 for (i=0; i<bmat->nz; i++) { 2032 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2033 } 2034 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2035 *norm = PetscSqrtReal(*norm); 2036 } else if (type == NORM_1) { /* max column norm */ 2037 PetscReal *tmp,*tmp2; 2038 PetscInt *jj,*garray = aij->garray; 2039 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2040 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2041 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2042 *norm = 0.0; 2043 v = amat->a; jj = amat->j; 2044 for (j=0; j<amat->nz; j++) { 2045 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2046 } 2047 v = bmat->a; jj = bmat->j; 2048 for (j=0; j<bmat->nz; j++) { 2049 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2050 } 2051 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2052 for (j=0; j<mat->cmap->N; j++) { 2053 if (tmp2[j] > *norm) *norm = tmp2[j]; 2054 } 2055 ierr = PetscFree(tmp);CHKERRQ(ierr); 2056 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2057 } else if (type == NORM_INFINITY) { /* max row norm */ 2058 PetscReal ntemp = 0.0; 2059 for (j=0; j<aij->A->rmap->n; j++) { 2060 v = amat->a + amat->i[j]; 2061 sum = 0.0; 2062 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2063 sum += PetscAbsScalar(*v); v++; 2064 } 2065 v = bmat->a + bmat->i[j]; 2066 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2067 sum += PetscAbsScalar(*v); v++; 2068 } 2069 if (sum > ntemp) ntemp = sum; 2070 } 2071 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2072 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2073 } 2074 PetscFunctionReturn(0); 2075 } 2076 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatTranspose_MPIAIJ" 2079 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2080 { 2081 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2082 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2083 PetscErrorCode ierr; 2084 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2085 PetscInt cstart = A->cmap->rstart,ncol; 2086 Mat B; 2087 MatScalar *array; 2088 2089 PetscFunctionBegin; 2090 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2091 2092 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2093 ai = Aloc->i; aj = Aloc->j; 2094 bi = Bloc->i; bj = Bloc->j; 2095 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2096 PetscInt *d_nnz,*g_nnz,*o_nnz; 2097 PetscSFNode *oloc; 2098 PETSC_UNUSED PetscSF sf; 2099 2100 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2101 /* compute d_nnz for preallocation */ 2102 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2103 for (i=0; i<ai[ma]; i++) { 2104 d_nnz[aj[i]]++; 2105 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2106 } 2107 /* compute local off-diagonal contributions */ 2108 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2109 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2110 /* map those to global */ 2111 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2112 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2113 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2114 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2115 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2116 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2117 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2118 2119 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2120 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2121 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2122 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2123 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2124 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2125 } else { 2126 B = *matout; 2127 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2128 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2129 } 2130 2131 /* copy over the A part */ 2132 array = Aloc->a; 2133 row = A->rmap->rstart; 2134 for (i=0; i<ma; i++) { 2135 ncol = ai[i+1]-ai[i]; 2136 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2137 row++; 2138 array += ncol; aj += ncol; 2139 } 2140 aj = Aloc->j; 2141 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2142 2143 /* copy over the B part */ 2144 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2145 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2146 array = Bloc->a; 2147 row = A->rmap->rstart; 2148 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2149 cols_tmp = cols; 2150 for (i=0; i<mb; i++) { 2151 ncol = bi[i+1]-bi[i]; 2152 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2153 row++; 2154 array += ncol; cols_tmp += ncol; 2155 } 2156 ierr = PetscFree(cols);CHKERRQ(ierr); 2157 2158 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2160 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2161 *matout = B; 2162 } else { 2163 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2164 } 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2170 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2171 { 2172 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2173 Mat a = aij->A,b = aij->B; 2174 PetscErrorCode ierr; 2175 PetscInt s1,s2,s3; 2176 2177 PetscFunctionBegin; 2178 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2179 if (rr) { 2180 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2181 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2182 /* Overlap communication with computation. */ 2183 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2184 } 2185 if (ll) { 2186 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2187 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2188 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2189 } 2190 /* scale the diagonal block */ 2191 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2192 2193 if (rr) { 2194 /* Do a scatter end and then right scale the off-diagonal block */ 2195 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2196 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2197 } 2198 PetscFunctionReturn(0); 2199 } 2200 2201 #undef __FUNCT__ 2202 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2203 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2204 { 2205 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2206 PetscErrorCode ierr; 2207 2208 PetscFunctionBegin; 2209 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2210 PetscFunctionReturn(0); 2211 } 2212 2213 #undef __FUNCT__ 2214 #define __FUNCT__ "MatEqual_MPIAIJ" 2215 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2216 { 2217 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2218 Mat a,b,c,d; 2219 PetscBool flg; 2220 PetscErrorCode ierr; 2221 2222 PetscFunctionBegin; 2223 a = matA->A; b = matA->B; 2224 c = matB->A; d = matB->B; 2225 2226 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2227 if (flg) { 2228 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2229 } 2230 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2231 PetscFunctionReturn(0); 2232 } 2233 2234 #undef __FUNCT__ 2235 #define __FUNCT__ "MatCopy_MPIAIJ" 2236 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2237 { 2238 PetscErrorCode ierr; 2239 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2240 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2241 2242 PetscFunctionBegin; 2243 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2244 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2245 /* because of the column compression in the off-processor part of the matrix a->B, 2246 the number of columns in a->B and b->B may be different, hence we cannot call 2247 the MatCopy() directly on the two parts. If need be, we can provide a more 2248 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2249 then copying the submatrices */ 2250 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2251 } else { 2252 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2253 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2254 } 2255 PetscFunctionReturn(0); 2256 } 2257 2258 #undef __FUNCT__ 2259 #define __FUNCT__ "MatSetUp_MPIAIJ" 2260 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2261 { 2262 PetscErrorCode ierr; 2263 2264 PetscFunctionBegin; 2265 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2266 PetscFunctionReturn(0); 2267 } 2268 2269 #undef __FUNCT__ 2270 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2271 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2272 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2273 { 2274 PetscInt i,m=Y->rmap->N; 2275 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2276 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2277 const PetscInt *xi = x->i,*yi = y->i; 2278 2279 PetscFunctionBegin; 2280 /* Set the number of nonzeros in the new matrix */ 2281 for (i=0; i<m; i++) { 2282 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2283 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2284 nnz[i] = 0; 2285 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2286 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2287 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2288 nnz[i]++; 2289 } 2290 for (; k<nzy; k++) nnz[i]++; 2291 } 2292 PetscFunctionReturn(0); 2293 } 2294 2295 #undef __FUNCT__ 2296 #define __FUNCT__ "MatAXPY_MPIAIJ" 2297 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2298 { 2299 PetscErrorCode ierr; 2300 PetscInt i; 2301 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2302 PetscBLASInt bnz,one=1; 2303 Mat_SeqAIJ *x,*y; 2304 2305 PetscFunctionBegin; 2306 if (str == SAME_NONZERO_PATTERN) { 2307 PetscScalar alpha = a; 2308 x = (Mat_SeqAIJ*)xx->A->data; 2309 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2310 y = (Mat_SeqAIJ*)yy->A->data; 2311 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2312 x = (Mat_SeqAIJ*)xx->B->data; 2313 y = (Mat_SeqAIJ*)yy->B->data; 2314 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2315 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2316 } else if (str == SUBSET_NONZERO_PATTERN) { 2317 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2318 2319 x = (Mat_SeqAIJ*)xx->B->data; 2320 y = (Mat_SeqAIJ*)yy->B->data; 2321 if (y->xtoy && y->XtoY != xx->B) { 2322 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2323 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2324 } 2325 if (!y->xtoy) { /* get xtoy */ 2326 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2327 y->XtoY = xx->B; 2328 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2329 } 2330 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2331 } else { 2332 Mat B; 2333 PetscInt *nnz_d,*nnz_o; 2334 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2335 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2336 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2337 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2338 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2339 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2340 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2341 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2342 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2343 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2344 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2345 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2346 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2347 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2348 } 2349 PetscFunctionReturn(0); 2350 } 2351 2352 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2353 2354 #undef __FUNCT__ 2355 #define __FUNCT__ "MatConjugate_MPIAIJ" 2356 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2357 { 2358 #if defined(PETSC_USE_COMPLEX) 2359 PetscErrorCode ierr; 2360 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2361 2362 PetscFunctionBegin; 2363 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2364 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2365 #else 2366 PetscFunctionBegin; 2367 #endif 2368 PetscFunctionReturn(0); 2369 } 2370 2371 #undef __FUNCT__ 2372 #define __FUNCT__ "MatRealPart_MPIAIJ" 2373 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2374 { 2375 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2376 PetscErrorCode ierr; 2377 2378 PetscFunctionBegin; 2379 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2380 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2381 PetscFunctionReturn(0); 2382 } 2383 2384 #undef __FUNCT__ 2385 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2386 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2387 { 2388 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2389 PetscErrorCode ierr; 2390 2391 PetscFunctionBegin; 2392 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2393 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2394 PetscFunctionReturn(0); 2395 } 2396 2397 #if defined(PETSC_HAVE_PBGL) 2398 2399 #include <boost/parallel/mpi/bsp_process_group.hpp> 2400 #include <boost/graph/distributed/ilu_default_graph.hpp> 2401 #include <boost/graph/distributed/ilu_0_block.hpp> 2402 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2403 #include <boost/graph/distributed/petsc/interface.hpp> 2404 #include <boost/multi_array.hpp> 2405 #include <boost/parallel/distributed_property_map->hpp> 2406 2407 #undef __FUNCT__ 2408 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2409 /* 2410 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2411 */ 2412 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2413 { 2414 namespace petsc = boost::distributed::petsc; 2415 2416 namespace graph_dist = boost::graph::distributed; 2417 using boost::graph::distributed::ilu_default::process_group_type; 2418 using boost::graph::ilu_permuted; 2419 2420 PetscBool row_identity, col_identity; 2421 PetscContainer c; 2422 PetscInt m, n, M, N; 2423 PetscErrorCode ierr; 2424 2425 PetscFunctionBegin; 2426 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2427 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2428 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2429 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2430 2431 process_group_type pg; 2432 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2433 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2434 lgraph_type& level_graph = *lgraph_p; 2435 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2436 2437 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2438 ilu_permuted(level_graph); 2439 2440 /* put together the new matrix */ 2441 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2442 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2443 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2444 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2445 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2446 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2447 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2448 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2449 2450 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2451 ierr = PetscContainerSetPointer(c, lgraph_p); 2452 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2453 ierr = PetscContainerDestroy(&c); 2454 PetscFunctionReturn(0); 2455 } 2456 2457 #undef __FUNCT__ 2458 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2459 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2460 { 2461 PetscFunctionBegin; 2462 PetscFunctionReturn(0); 2463 } 2464 2465 #undef __FUNCT__ 2466 #define __FUNCT__ "MatSolve_MPIAIJ" 2467 /* 2468 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2469 */ 2470 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2471 { 2472 namespace graph_dist = boost::graph::distributed; 2473 2474 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2475 lgraph_type *lgraph_p; 2476 PetscContainer c; 2477 PetscErrorCode ierr; 2478 2479 PetscFunctionBegin; 2480 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2481 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2482 ierr = VecCopy(b, x);CHKERRQ(ierr); 2483 2484 PetscScalar *array_x; 2485 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2486 PetscInt sx; 2487 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2488 2489 PetscScalar *array_b; 2490 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2491 PetscInt sb; 2492 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2493 2494 lgraph_type& level_graph = *lgraph_p; 2495 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2496 2497 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2498 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2499 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2500 2501 typedef boost::iterator_property_map<array_ref_type::iterator, 2502 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2503 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2504 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2505 2506 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2507 PetscFunctionReturn(0); 2508 } 2509 #endif 2510 2511 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2512 PetscInt nzlocal,nsends,nrecvs; 2513 PetscMPIInt *send_rank,*recv_rank; 2514 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2515 PetscScalar *sbuf_a,**rbuf_a; 2516 PetscErrorCode (*Destroy)(Mat); 2517 } Mat_Redundant; 2518 2519 #undef __FUNCT__ 2520 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2521 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2522 { 2523 PetscErrorCode ierr; 2524 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2525 PetscInt i; 2526 2527 PetscFunctionBegin; 2528 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2529 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2530 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2531 for (i=0; i<redund->nrecvs; i++) { 2532 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2533 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2534 } 2535 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2536 ierr = PetscFree(redund);CHKERRQ(ierr); 2537 PetscFunctionReturn(0); 2538 } 2539 2540 #undef __FUNCT__ 2541 #define __FUNCT__ "MatDestroy_MatRedundant" 2542 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2543 { 2544 PetscErrorCode ierr; 2545 PetscContainer container; 2546 Mat_Redundant *redund=NULL; 2547 2548 PetscFunctionBegin; 2549 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2550 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2551 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2552 2553 A->ops->destroy = redund->Destroy; 2554 2555 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2556 if (A->ops->destroy) { 2557 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2558 } 2559 PetscFunctionReturn(0); 2560 } 2561 2562 #undef __FUNCT__ 2563 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2564 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2565 { 2566 PetscMPIInt rank,size; 2567 MPI_Comm comm; 2568 PetscErrorCode ierr; 2569 PetscInt nsends = 0,nrecvs=0,i,rownz_max=0; 2570 PetscMPIInt *send_rank= NULL,*recv_rank=NULL; 2571 PetscInt *rowrange = mat->rmap->range; 2572 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2573 Mat A = aij->A,B=aij->B,C=*matredundant; 2574 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2575 PetscScalar *sbuf_a; 2576 PetscInt nzlocal=a->nz+b->nz; 2577 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2578 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2579 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2580 MatScalar *aworkA,*aworkB; 2581 PetscScalar *vals; 2582 PetscMPIInt tag1,tag2,tag3,imdex; 2583 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2584 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2585 MPI_Status recv_status,*send_status; 2586 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2587 PetscInt **rbuf_j=NULL; 2588 PetscScalar **rbuf_a=NULL; 2589 Mat_Redundant *redund =NULL; 2590 PetscContainer container; 2591 2592 PetscFunctionBegin; 2593 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2594 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2595 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2596 2597 if (reuse == MAT_REUSE_MATRIX) { 2598 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2599 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2600 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2601 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2602 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2603 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2604 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2605 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2606 2607 nsends = redund->nsends; 2608 nrecvs = redund->nrecvs; 2609 send_rank = redund->send_rank; 2610 recv_rank = redund->recv_rank; 2611 sbuf_nz = redund->sbuf_nz; 2612 rbuf_nz = redund->rbuf_nz; 2613 sbuf_j = redund->sbuf_j; 2614 sbuf_a = redund->sbuf_a; 2615 rbuf_j = redund->rbuf_j; 2616 rbuf_a = redund->rbuf_a; 2617 } 2618 2619 if (reuse == MAT_INITIAL_MATRIX) { 2620 PetscMPIInt subrank,subsize; 2621 PetscInt nleftover,np_subcomm; 2622 /* get the destination processors' id send_rank, nsends and nrecvs */ 2623 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2624 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2625 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2626 2627 np_subcomm = size/nsubcomm; 2628 nleftover = size - nsubcomm*np_subcomm; 2629 2630 nsends = 0; nrecvs = 0; 2631 for (i=0; i<size; i++) { /* i=rank*/ 2632 if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */ 2633 send_rank[nsends] = i; nsends++; 2634 recv_rank[nrecvs++] = i; 2635 } 2636 } 2637 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2638 i = size-nleftover-1; 2639 j = 0; 2640 while (j < nsubcomm - nleftover) { 2641 send_rank[nsends++] = i; 2642 i--; j++; 2643 } 2644 } 2645 2646 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2647 for (i=0; i<nleftover; i++) { 2648 recv_rank[nrecvs++] = size-nleftover+i; 2649 } 2650 } 2651 2652 /* allocate sbuf_j, sbuf_a */ 2653 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2654 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2655 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2656 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2657 2658 /* copy mat's local entries into the buffers */ 2659 if (reuse == MAT_INITIAL_MATRIX) { 2660 rownz_max = 0; 2661 rptr = sbuf_j; 2662 cols = sbuf_j + rend-rstart + 1; 2663 vals = sbuf_a; 2664 rptr[0] = 0; 2665 for (i=0; i<rend-rstart; i++) { 2666 row = i + rstart; 2667 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2668 ncols = nzA + nzB; 2669 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2670 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2671 /* load the column indices for this row into cols */ 2672 lwrite = 0; 2673 for (l=0; l<nzB; l++) { 2674 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2675 vals[lwrite] = aworkB[l]; 2676 cols[lwrite++] = ctmp; 2677 } 2678 } 2679 for (l=0; l<nzA; l++) { 2680 vals[lwrite] = aworkA[l]; 2681 cols[lwrite++] = cstart + cworkA[l]; 2682 } 2683 for (l=0; l<nzB; l++) { 2684 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2685 vals[lwrite] = aworkB[l]; 2686 cols[lwrite++] = ctmp; 2687 } 2688 } 2689 vals += ncols; 2690 cols += ncols; 2691 rptr[i+1] = rptr[i] + ncols; 2692 if (rownz_max < ncols) rownz_max = ncols; 2693 } 2694 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2695 } else { /* only copy matrix values into sbuf_a */ 2696 rptr = sbuf_j; 2697 vals = sbuf_a; 2698 rptr[0] = 0; 2699 for (i=0; i<rend-rstart; i++) { 2700 row = i + rstart; 2701 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2702 ncols = nzA + nzB; 2703 cworkB = b->j + b->i[i]; 2704 aworkA = a->a + a->i[i]; 2705 aworkB = b->a + b->i[i]; 2706 lwrite = 0; 2707 for (l=0; l<nzB; l++) { 2708 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2709 } 2710 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2711 for (l=0; l<nzB; l++) { 2712 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2713 } 2714 vals += ncols; 2715 rptr[i+1] = rptr[i] + ncols; 2716 } 2717 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2718 2719 /* send nzlocal to others, and recv other's nzlocal */ 2720 /*--------------------------------------------------*/ 2721 if (reuse == MAT_INITIAL_MATRIX) { 2722 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2723 2724 s_waits2 = s_waits3 + nsends; 2725 s_waits1 = s_waits2 + nsends; 2726 r_waits1 = s_waits1 + nsends; 2727 r_waits2 = r_waits1 + nrecvs; 2728 r_waits3 = r_waits2 + nrecvs; 2729 } else { 2730 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2731 2732 r_waits3 = s_waits3 + nsends; 2733 } 2734 2735 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2736 if (reuse == MAT_INITIAL_MATRIX) { 2737 /* get new tags to keep the communication clean */ 2738 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2739 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2740 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2741 2742 /* post receives of other's nzlocal */ 2743 for (i=0; i<nrecvs; i++) { 2744 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2745 } 2746 /* send nzlocal to others */ 2747 for (i=0; i<nsends; i++) { 2748 sbuf_nz[i] = nzlocal; 2749 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2750 } 2751 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2752 count = nrecvs; 2753 while (count) { 2754 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2755 2756 recv_rank[imdex] = recv_status.MPI_SOURCE; 2757 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2758 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2759 2760 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2761 2762 rbuf_nz[imdex] += i + 2; 2763 2764 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2765 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2766 count--; 2767 } 2768 /* wait on sends of nzlocal */ 2769 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2770 /* send mat->i,j to others, and recv from other's */ 2771 /*------------------------------------------------*/ 2772 for (i=0; i<nsends; i++) { 2773 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2774 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2775 } 2776 /* wait on receives of mat->i,j */ 2777 /*------------------------------*/ 2778 count = nrecvs; 2779 while (count) { 2780 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2781 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2782 count--; 2783 } 2784 /* wait on sends of mat->i,j */ 2785 /*---------------------------*/ 2786 if (nsends) { 2787 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2788 } 2789 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2790 2791 /* post receives, send and receive mat->a */ 2792 /*----------------------------------------*/ 2793 for (imdex=0; imdex<nrecvs; imdex++) { 2794 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2795 } 2796 for (i=0; i<nsends; i++) { 2797 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2798 } 2799 count = nrecvs; 2800 while (count) { 2801 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2802 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2803 count--; 2804 } 2805 if (nsends) { 2806 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2807 } 2808 2809 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2810 2811 /* create redundant matrix */ 2812 /*-------------------------*/ 2813 if (reuse == MAT_INITIAL_MATRIX) { 2814 /* compute rownz_max for preallocation */ 2815 for (imdex=0; imdex<nrecvs; imdex++) { 2816 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2817 rptr = rbuf_j[imdex]; 2818 for (i=0; i<j; i++) { 2819 ncols = rptr[i+1] - rptr[i]; 2820 if (rownz_max < ncols) rownz_max = ncols; 2821 } 2822 } 2823 2824 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2825 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2826 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2827 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2828 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2829 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2830 } else { 2831 C = *matredundant; 2832 } 2833 2834 /* insert local matrix entries */ 2835 rptr = sbuf_j; 2836 cols = sbuf_j + rend-rstart + 1; 2837 vals = sbuf_a; 2838 for (i=0; i<rend-rstart; i++) { 2839 row = i + rstart; 2840 ncols = rptr[i+1] - rptr[i]; 2841 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2842 vals += ncols; 2843 cols += ncols; 2844 } 2845 /* insert received matrix entries */ 2846 for (imdex=0; imdex<nrecvs; imdex++) { 2847 rstart = rowrange[recv_rank[imdex]]; 2848 rend = rowrange[recv_rank[imdex]+1]; 2849 rptr = rbuf_j[imdex]; 2850 cols = rbuf_j[imdex] + rend-rstart + 1; 2851 vals = rbuf_a[imdex]; 2852 for (i=0; i<rend-rstart; i++) { 2853 row = i + rstart; 2854 ncols = rptr[i+1] - rptr[i]; 2855 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2856 vals += ncols; 2857 cols += ncols; 2858 } 2859 } 2860 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2861 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2862 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2863 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2864 if (reuse == MAT_INITIAL_MATRIX) { 2865 PetscContainer container; 2866 *matredundant = C; 2867 /* create a supporting struct and attach it to C for reuse */ 2868 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2869 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2870 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2871 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2872 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2873 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2874 2875 redund->nzlocal = nzlocal; 2876 redund->nsends = nsends; 2877 redund->nrecvs = nrecvs; 2878 redund->send_rank = send_rank; 2879 redund->recv_rank = recv_rank; 2880 redund->sbuf_nz = sbuf_nz; 2881 redund->rbuf_nz = rbuf_nz; 2882 redund->sbuf_j = sbuf_j; 2883 redund->sbuf_a = sbuf_a; 2884 redund->rbuf_j = rbuf_j; 2885 redund->rbuf_a = rbuf_a; 2886 2887 redund->Destroy = C->ops->destroy; 2888 C->ops->destroy = MatDestroy_MatRedundant; 2889 } 2890 PetscFunctionReturn(0); 2891 } 2892 2893 #undef __FUNCT__ 2894 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2895 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2896 { 2897 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2898 PetscErrorCode ierr; 2899 PetscInt i,*idxb = 0; 2900 PetscScalar *va,*vb; 2901 Vec vtmp; 2902 2903 PetscFunctionBegin; 2904 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2905 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2906 if (idx) { 2907 for (i=0; i<A->rmap->n; i++) { 2908 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2909 } 2910 } 2911 2912 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2913 if (idx) { 2914 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2915 } 2916 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2917 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2918 2919 for (i=0; i<A->rmap->n; i++) { 2920 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2921 va[i] = vb[i]; 2922 if (idx) idx[i] = a->garray[idxb[i]]; 2923 } 2924 } 2925 2926 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2927 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2928 ierr = PetscFree(idxb);CHKERRQ(ierr); 2929 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2930 PetscFunctionReturn(0); 2931 } 2932 2933 #undef __FUNCT__ 2934 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2935 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2936 { 2937 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2938 PetscErrorCode ierr; 2939 PetscInt i,*idxb = 0; 2940 PetscScalar *va,*vb; 2941 Vec vtmp; 2942 2943 PetscFunctionBegin; 2944 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2945 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2946 if (idx) { 2947 for (i=0; i<A->cmap->n; i++) { 2948 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2949 } 2950 } 2951 2952 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2953 if (idx) { 2954 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2955 } 2956 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2957 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2958 2959 for (i=0; i<A->rmap->n; i++) { 2960 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2961 va[i] = vb[i]; 2962 if (idx) idx[i] = a->garray[idxb[i]]; 2963 } 2964 } 2965 2966 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2967 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2968 ierr = PetscFree(idxb);CHKERRQ(ierr); 2969 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2970 PetscFunctionReturn(0); 2971 } 2972 2973 #undef __FUNCT__ 2974 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2975 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2976 { 2977 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2978 PetscInt n = A->rmap->n; 2979 PetscInt cstart = A->cmap->rstart; 2980 PetscInt *cmap = mat->garray; 2981 PetscInt *diagIdx, *offdiagIdx; 2982 Vec diagV, offdiagV; 2983 PetscScalar *a, *diagA, *offdiagA; 2984 PetscInt r; 2985 PetscErrorCode ierr; 2986 2987 PetscFunctionBegin; 2988 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2989 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2990 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2991 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2992 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2993 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2994 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2995 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2996 for (r = 0; r < n; ++r) { 2997 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2998 a[r] = diagA[r]; 2999 idx[r] = cstart + diagIdx[r]; 3000 } else { 3001 a[r] = offdiagA[r]; 3002 idx[r] = cmap[offdiagIdx[r]]; 3003 } 3004 } 3005 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3006 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3007 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3008 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3009 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3010 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3011 PetscFunctionReturn(0); 3012 } 3013 3014 #undef __FUNCT__ 3015 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3016 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3017 { 3018 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3019 PetscInt n = A->rmap->n; 3020 PetscInt cstart = A->cmap->rstart; 3021 PetscInt *cmap = mat->garray; 3022 PetscInt *diagIdx, *offdiagIdx; 3023 Vec diagV, offdiagV; 3024 PetscScalar *a, *diagA, *offdiagA; 3025 PetscInt r; 3026 PetscErrorCode ierr; 3027 3028 PetscFunctionBegin; 3029 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3030 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3031 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3032 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3033 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3034 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3035 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3036 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3037 for (r = 0; r < n; ++r) { 3038 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3039 a[r] = diagA[r]; 3040 idx[r] = cstart + diagIdx[r]; 3041 } else { 3042 a[r] = offdiagA[r]; 3043 idx[r] = cmap[offdiagIdx[r]]; 3044 } 3045 } 3046 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3047 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3048 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3049 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3050 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3051 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3052 PetscFunctionReturn(0); 3053 } 3054 3055 #undef __FUNCT__ 3056 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3057 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3058 { 3059 PetscErrorCode ierr; 3060 Mat *dummy; 3061 3062 PetscFunctionBegin; 3063 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3064 *newmat = *dummy; 3065 ierr = PetscFree(dummy);CHKERRQ(ierr); 3066 PetscFunctionReturn(0); 3067 } 3068 3069 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3070 3071 #undef __FUNCT__ 3072 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3073 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3074 { 3075 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3076 PetscErrorCode ierr; 3077 3078 PetscFunctionBegin; 3079 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3080 PetscFunctionReturn(0); 3081 } 3082 3083 #undef __FUNCT__ 3084 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3085 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3086 { 3087 PetscErrorCode ierr; 3088 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3089 3090 PetscFunctionBegin; 3091 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3092 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3093 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3094 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3095 PetscFunctionReturn(0); 3096 } 3097 3098 /* -------------------------------------------------------------------*/ 3099 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3100 MatGetRow_MPIAIJ, 3101 MatRestoreRow_MPIAIJ, 3102 MatMult_MPIAIJ, 3103 /* 4*/ MatMultAdd_MPIAIJ, 3104 MatMultTranspose_MPIAIJ, 3105 MatMultTransposeAdd_MPIAIJ, 3106 #if defined(PETSC_HAVE_PBGL) 3107 MatSolve_MPIAIJ, 3108 #else 3109 0, 3110 #endif 3111 0, 3112 0, 3113 /*10*/ 0, 3114 0, 3115 0, 3116 MatSOR_MPIAIJ, 3117 MatTranspose_MPIAIJ, 3118 /*15*/ MatGetInfo_MPIAIJ, 3119 MatEqual_MPIAIJ, 3120 MatGetDiagonal_MPIAIJ, 3121 MatDiagonalScale_MPIAIJ, 3122 MatNorm_MPIAIJ, 3123 /*20*/ MatAssemblyBegin_MPIAIJ, 3124 MatAssemblyEnd_MPIAIJ, 3125 MatSetOption_MPIAIJ, 3126 MatZeroEntries_MPIAIJ, 3127 /*24*/ MatZeroRows_MPIAIJ, 3128 0, 3129 #if defined(PETSC_HAVE_PBGL) 3130 0, 3131 #else 3132 0, 3133 #endif 3134 0, 3135 0, 3136 /*29*/ MatSetUp_MPIAIJ, 3137 #if defined(PETSC_HAVE_PBGL) 3138 0, 3139 #else 3140 0, 3141 #endif 3142 0, 3143 0, 3144 0, 3145 /*34*/ MatDuplicate_MPIAIJ, 3146 0, 3147 0, 3148 0, 3149 0, 3150 /*39*/ MatAXPY_MPIAIJ, 3151 MatGetSubMatrices_MPIAIJ, 3152 MatIncreaseOverlap_MPIAIJ, 3153 MatGetValues_MPIAIJ, 3154 MatCopy_MPIAIJ, 3155 /*44*/ MatGetRowMax_MPIAIJ, 3156 MatScale_MPIAIJ, 3157 0, 3158 0, 3159 MatZeroRowsColumns_MPIAIJ, 3160 /*49*/ MatSetRandom_MPIAIJ, 3161 0, 3162 0, 3163 0, 3164 0, 3165 /*54*/ MatFDColoringCreate_MPIAIJ, 3166 0, 3167 MatSetUnfactored_MPIAIJ, 3168 MatPermute_MPIAIJ, 3169 0, 3170 /*59*/ MatGetSubMatrix_MPIAIJ, 3171 MatDestroy_MPIAIJ, 3172 MatView_MPIAIJ, 3173 0, 3174 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3175 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3176 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3177 0, 3178 0, 3179 0, 3180 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3181 MatGetRowMinAbs_MPIAIJ, 3182 0, 3183 MatSetColoring_MPIAIJ, 3184 0, 3185 MatSetValuesAdifor_MPIAIJ, 3186 /*75*/ MatFDColoringApply_AIJ, 3187 0, 3188 0, 3189 0, 3190 MatFindZeroDiagonals_MPIAIJ, 3191 /*80*/ 0, 3192 0, 3193 0, 3194 /*83*/ MatLoad_MPIAIJ, 3195 0, 3196 0, 3197 0, 3198 0, 3199 0, 3200 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3201 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3202 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3203 MatPtAP_MPIAIJ_MPIAIJ, 3204 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3205 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3206 0, 3207 0, 3208 0, 3209 0, 3210 /*99*/ 0, 3211 0, 3212 0, 3213 MatConjugate_MPIAIJ, 3214 0, 3215 /*104*/MatSetValuesRow_MPIAIJ, 3216 MatRealPart_MPIAIJ, 3217 MatImaginaryPart_MPIAIJ, 3218 0, 3219 0, 3220 /*109*/0, 3221 MatGetRedundantMatrix_MPIAIJ, 3222 MatGetRowMin_MPIAIJ, 3223 0, 3224 0, 3225 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3226 0, 3227 0, 3228 0, 3229 0, 3230 /*119*/0, 3231 0, 3232 0, 3233 0, 3234 MatGetMultiProcBlock_MPIAIJ, 3235 /*124*/MatFindNonzeroRows_MPIAIJ, 3236 MatGetColumnNorms_MPIAIJ, 3237 MatInvertBlockDiagonal_MPIAIJ, 3238 0, 3239 MatGetSubMatricesParallel_MPIAIJ, 3240 /*129*/0, 3241 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3242 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3243 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3244 0, 3245 /*134*/0, 3246 0, 3247 0, 3248 0, 3249 0, 3250 /*139*/0, 3251 0 3252 }; 3253 3254 /* ----------------------------------------------------------------------------------------*/ 3255 3256 #undef __FUNCT__ 3257 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3258 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3259 { 3260 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3261 PetscErrorCode ierr; 3262 3263 PetscFunctionBegin; 3264 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3265 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3266 PetscFunctionReturn(0); 3267 } 3268 3269 #undef __FUNCT__ 3270 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3271 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3272 { 3273 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3274 PetscErrorCode ierr; 3275 3276 PetscFunctionBegin; 3277 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3278 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3279 PetscFunctionReturn(0); 3280 } 3281 3282 #undef __FUNCT__ 3283 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3284 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3285 { 3286 Mat_MPIAIJ *b; 3287 PetscErrorCode ierr; 3288 PetscInt i; 3289 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3290 3291 PetscFunctionBegin; 3292 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3293 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3294 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3295 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3296 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3297 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3298 3299 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3300 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3301 if (d_nnz) { 3302 for (i=0; i<B->rmap->n; i++) { 3303 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3304 } 3305 } 3306 if (o_nnz) { 3307 for (i=0; i<B->rmap->n; i++) { 3308 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3309 } 3310 } 3311 b = (Mat_MPIAIJ*)B->data; 3312 3313 if (!B->preallocated) { 3314 /* Explicitly create 2 MATSEQAIJ matrices. */ 3315 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3316 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3317 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3318 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3319 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3320 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3321 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3322 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3323 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3324 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3325 } 3326 3327 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3328 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3329 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3330 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3331 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3332 B->preallocated = PETSC_TRUE; 3333 PetscFunctionReturn(0); 3334 } 3335 3336 #undef __FUNCT__ 3337 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3338 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3339 { 3340 Mat mat; 3341 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3342 PetscErrorCode ierr; 3343 3344 PetscFunctionBegin; 3345 *newmat = 0; 3346 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3347 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3348 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3349 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3350 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3351 a = (Mat_MPIAIJ*)mat->data; 3352 3353 mat->factortype = matin->factortype; 3354 mat->rmap->bs = matin->rmap->bs; 3355 mat->cmap->bs = matin->cmap->bs; 3356 mat->assembled = PETSC_TRUE; 3357 mat->insertmode = NOT_SET_VALUES; 3358 mat->preallocated = PETSC_TRUE; 3359 3360 a->size = oldmat->size; 3361 a->rank = oldmat->rank; 3362 a->donotstash = oldmat->donotstash; 3363 a->roworiented = oldmat->roworiented; 3364 a->rowindices = 0; 3365 a->rowvalues = 0; 3366 a->getrowactive = PETSC_FALSE; 3367 3368 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3369 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3370 3371 if (oldmat->colmap) { 3372 #if defined(PETSC_USE_CTABLE) 3373 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3374 #else 3375 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3376 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3377 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3378 #endif 3379 } else a->colmap = 0; 3380 if (oldmat->garray) { 3381 PetscInt len; 3382 len = oldmat->B->cmap->n; 3383 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3384 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3385 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3386 } else a->garray = 0; 3387 3388 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3389 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3390 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3391 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3392 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3393 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3394 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3395 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3396 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3397 *newmat = mat; 3398 PetscFunctionReturn(0); 3399 } 3400 3401 3402 3403 #undef __FUNCT__ 3404 #define __FUNCT__ "MatLoad_MPIAIJ" 3405 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3406 { 3407 PetscScalar *vals,*svals; 3408 MPI_Comm comm; 3409 PetscErrorCode ierr; 3410 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3411 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3412 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3413 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3414 PetscInt cend,cstart,n,*rowners,sizesset=1; 3415 int fd; 3416 PetscInt bs = 1; 3417 3418 PetscFunctionBegin; 3419 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3420 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3421 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3422 if (!rank) { 3423 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3424 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3425 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3426 } 3427 3428 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3429 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3430 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3431 3432 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3433 3434 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3435 M = header[1]; N = header[2]; 3436 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3437 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3438 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3439 3440 /* If global sizes are set, check if they are consistent with that given in the file */ 3441 if (sizesset) { 3442 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3443 } 3444 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3445 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3446 3447 /* determine ownership of all (block) rows */ 3448 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3449 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3450 else m = newMat->rmap->n; /* Set by user */ 3451 3452 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3453 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3454 3455 /* First process needs enough room for process with most rows */ 3456 if (!rank) { 3457 mmax = rowners[1]; 3458 for (i=2; i<=size; i++) { 3459 mmax = PetscMax(mmax, rowners[i]); 3460 } 3461 } else mmax = -1; /* unused, but compilers complain */ 3462 3463 rowners[0] = 0; 3464 for (i=2; i<=size; i++) { 3465 rowners[i] += rowners[i-1]; 3466 } 3467 rstart = rowners[rank]; 3468 rend = rowners[rank+1]; 3469 3470 /* distribute row lengths to all processors */ 3471 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 3472 if (!rank) { 3473 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3474 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3475 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3476 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3477 for (j=0; j<m; j++) { 3478 procsnz[0] += ourlens[j]; 3479 } 3480 for (i=1; i<size; i++) { 3481 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3482 /* calculate the number of nonzeros on each processor */ 3483 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3484 procsnz[i] += rowlengths[j]; 3485 } 3486 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3487 } 3488 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3489 } else { 3490 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3491 } 3492 3493 if (!rank) { 3494 /* determine max buffer needed and allocate it */ 3495 maxnz = 0; 3496 for (i=0; i<size; i++) { 3497 maxnz = PetscMax(maxnz,procsnz[i]); 3498 } 3499 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3500 3501 /* read in my part of the matrix column indices */ 3502 nz = procsnz[0]; 3503 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3504 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3505 3506 /* read in every one elses and ship off */ 3507 for (i=1; i<size; i++) { 3508 nz = procsnz[i]; 3509 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3510 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3511 } 3512 ierr = PetscFree(cols);CHKERRQ(ierr); 3513 } else { 3514 /* determine buffer space needed for message */ 3515 nz = 0; 3516 for (i=0; i<m; i++) { 3517 nz += ourlens[i]; 3518 } 3519 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3520 3521 /* receive message of column indices*/ 3522 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3523 } 3524 3525 /* determine column ownership if matrix is not square */ 3526 if (N != M) { 3527 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3528 else n = newMat->cmap->n; 3529 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3530 cstart = cend - n; 3531 } else { 3532 cstart = rstart; 3533 cend = rend; 3534 n = cend - cstart; 3535 } 3536 3537 /* loop over local rows, determining number of off diagonal entries */ 3538 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3539 jj = 0; 3540 for (i=0; i<m; i++) { 3541 for (j=0; j<ourlens[i]; j++) { 3542 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3543 jj++; 3544 } 3545 } 3546 3547 for (i=0; i<m; i++) { 3548 ourlens[i] -= offlens[i]; 3549 } 3550 if (!sizesset) { 3551 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3552 } 3553 3554 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3555 3556 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3557 3558 for (i=0; i<m; i++) { 3559 ourlens[i] += offlens[i]; 3560 } 3561 3562 if (!rank) { 3563 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3564 3565 /* read in my part of the matrix numerical values */ 3566 nz = procsnz[0]; 3567 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3568 3569 /* insert into matrix */ 3570 jj = rstart; 3571 smycols = mycols; 3572 svals = vals; 3573 for (i=0; i<m; i++) { 3574 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3575 smycols += ourlens[i]; 3576 svals += ourlens[i]; 3577 jj++; 3578 } 3579 3580 /* read in other processors and ship out */ 3581 for (i=1; i<size; i++) { 3582 nz = procsnz[i]; 3583 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3584 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3585 } 3586 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3587 } else { 3588 /* receive numeric values */ 3589 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3590 3591 /* receive message of values*/ 3592 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3593 3594 /* insert into matrix */ 3595 jj = rstart; 3596 smycols = mycols; 3597 svals = vals; 3598 for (i=0; i<m; i++) { 3599 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3600 smycols += ourlens[i]; 3601 svals += ourlens[i]; 3602 jj++; 3603 } 3604 } 3605 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3606 ierr = PetscFree(vals);CHKERRQ(ierr); 3607 ierr = PetscFree(mycols);CHKERRQ(ierr); 3608 ierr = PetscFree(rowners);CHKERRQ(ierr); 3609 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3610 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3611 PetscFunctionReturn(0); 3612 } 3613 3614 #undef __FUNCT__ 3615 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3616 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3617 { 3618 PetscErrorCode ierr; 3619 IS iscol_local; 3620 PetscInt csize; 3621 3622 PetscFunctionBegin; 3623 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3624 if (call == MAT_REUSE_MATRIX) { 3625 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3626 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3627 } else { 3628 PetscInt cbs; 3629 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3630 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3631 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3632 } 3633 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3634 if (call == MAT_INITIAL_MATRIX) { 3635 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3636 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3637 } 3638 PetscFunctionReturn(0); 3639 } 3640 3641 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3642 #undef __FUNCT__ 3643 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3644 /* 3645 Not great since it makes two copies of the submatrix, first an SeqAIJ 3646 in local and then by concatenating the local matrices the end result. 3647 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3648 3649 Note: This requires a sequential iscol with all indices. 3650 */ 3651 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3652 { 3653 PetscErrorCode ierr; 3654 PetscMPIInt rank,size; 3655 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3656 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3657 PetscBool allcolumns, colflag; 3658 Mat M,Mreuse; 3659 MatScalar *vwork,*aa; 3660 MPI_Comm comm; 3661 Mat_SeqAIJ *aij; 3662 3663 PetscFunctionBegin; 3664 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3665 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3666 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3667 3668 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3669 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3670 if (colflag && ncol == mat->cmap->N) { 3671 allcolumns = PETSC_TRUE; 3672 } else { 3673 allcolumns = PETSC_FALSE; 3674 } 3675 if (call == MAT_REUSE_MATRIX) { 3676 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3677 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3678 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3679 } else { 3680 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3681 } 3682 3683 /* 3684 m - number of local rows 3685 n - number of columns (same on all processors) 3686 rstart - first row in new global matrix generated 3687 */ 3688 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3689 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3690 if (call == MAT_INITIAL_MATRIX) { 3691 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3692 ii = aij->i; 3693 jj = aij->j; 3694 3695 /* 3696 Determine the number of non-zeros in the diagonal and off-diagonal 3697 portions of the matrix in order to do correct preallocation 3698 */ 3699 3700 /* first get start and end of "diagonal" columns */ 3701 if (csize == PETSC_DECIDE) { 3702 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3703 if (mglobal == n) { /* square matrix */ 3704 nlocal = m; 3705 } else { 3706 nlocal = n/size + ((n % size) > rank); 3707 } 3708 } else { 3709 nlocal = csize; 3710 } 3711 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3712 rstart = rend - nlocal; 3713 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3714 3715 /* next, compute all the lengths */ 3716 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3717 olens = dlens + m; 3718 for (i=0; i<m; i++) { 3719 jend = ii[i+1] - ii[i]; 3720 olen = 0; 3721 dlen = 0; 3722 for (j=0; j<jend; j++) { 3723 if (*jj < rstart || *jj >= rend) olen++; 3724 else dlen++; 3725 jj++; 3726 } 3727 olens[i] = olen; 3728 dlens[i] = dlen; 3729 } 3730 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3731 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3732 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3733 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3734 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3735 ierr = PetscFree(dlens);CHKERRQ(ierr); 3736 } else { 3737 PetscInt ml,nl; 3738 3739 M = *newmat; 3740 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3741 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3742 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3743 /* 3744 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3745 rather than the slower MatSetValues(). 3746 */ 3747 M->was_assembled = PETSC_TRUE; 3748 M->assembled = PETSC_FALSE; 3749 } 3750 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3751 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3752 ii = aij->i; 3753 jj = aij->j; 3754 aa = aij->a; 3755 for (i=0; i<m; i++) { 3756 row = rstart + i; 3757 nz = ii[i+1] - ii[i]; 3758 cwork = jj; jj += nz; 3759 vwork = aa; aa += nz; 3760 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3761 } 3762 3763 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3764 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3765 *newmat = M; 3766 3767 /* save submatrix used in processor for next request */ 3768 if (call == MAT_INITIAL_MATRIX) { 3769 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3770 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3771 } 3772 PetscFunctionReturn(0); 3773 } 3774 3775 #undef __FUNCT__ 3776 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3777 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3778 { 3779 PetscInt m,cstart, cend,j,nnz,i,d; 3780 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3781 const PetscInt *JJ; 3782 PetscScalar *values; 3783 PetscErrorCode ierr; 3784 3785 PetscFunctionBegin; 3786 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3787 3788 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3789 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3790 m = B->rmap->n; 3791 cstart = B->cmap->rstart; 3792 cend = B->cmap->rend; 3793 rstart = B->rmap->rstart; 3794 3795 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3796 3797 #if defined(PETSC_USE_DEBUGGING) 3798 for (i=0; i<m; i++) { 3799 nnz = Ii[i+1]- Ii[i]; 3800 JJ = J + Ii[i]; 3801 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3802 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3803 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3804 } 3805 #endif 3806 3807 for (i=0; i<m; i++) { 3808 nnz = Ii[i+1]- Ii[i]; 3809 JJ = J + Ii[i]; 3810 nnz_max = PetscMax(nnz_max,nnz); 3811 d = 0; 3812 for (j=0; j<nnz; j++) { 3813 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3814 } 3815 d_nnz[i] = d; 3816 o_nnz[i] = nnz - d; 3817 } 3818 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3819 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3820 3821 if (v) values = (PetscScalar*)v; 3822 else { 3823 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3824 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3825 } 3826 3827 for (i=0; i<m; i++) { 3828 ii = i + rstart; 3829 nnz = Ii[i+1]- Ii[i]; 3830 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3831 } 3832 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3833 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3834 3835 if (!v) { 3836 ierr = PetscFree(values);CHKERRQ(ierr); 3837 } 3838 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3839 PetscFunctionReturn(0); 3840 } 3841 3842 #undef __FUNCT__ 3843 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3844 /*@ 3845 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3846 (the default parallel PETSc format). 3847 3848 Collective on MPI_Comm 3849 3850 Input Parameters: 3851 + B - the matrix 3852 . i - the indices into j for the start of each local row (starts with zero) 3853 . j - the column indices for each local row (starts with zero) 3854 - v - optional values in the matrix 3855 3856 Level: developer 3857 3858 Notes: 3859 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3860 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3861 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3862 3863 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3864 3865 The format which is used for the sparse matrix input, is equivalent to a 3866 row-major ordering.. i.e for the following matrix, the input data expected is 3867 as shown: 3868 3869 1 0 0 3870 2 0 3 P0 3871 ------- 3872 4 5 6 P1 3873 3874 Process0 [P0]: rows_owned=[0,1] 3875 i = {0,1,3} [size = nrow+1 = 2+1] 3876 j = {0,0,2} [size = nz = 6] 3877 v = {1,2,3} [size = nz = 6] 3878 3879 Process1 [P1]: rows_owned=[2] 3880 i = {0,3} [size = nrow+1 = 1+1] 3881 j = {0,1,2} [size = nz = 6] 3882 v = {4,5,6} [size = nz = 6] 3883 3884 .keywords: matrix, aij, compressed row, sparse, parallel 3885 3886 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3887 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3888 @*/ 3889 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3890 { 3891 PetscErrorCode ierr; 3892 3893 PetscFunctionBegin; 3894 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3895 PetscFunctionReturn(0); 3896 } 3897 3898 #undef __FUNCT__ 3899 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3900 /*@C 3901 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3902 (the default parallel PETSc format). For good matrix assembly performance 3903 the user should preallocate the matrix storage by setting the parameters 3904 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3905 performance can be increased by more than a factor of 50. 3906 3907 Collective on MPI_Comm 3908 3909 Input Parameters: 3910 + A - the matrix 3911 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3912 (same value is used for all local rows) 3913 . d_nnz - array containing the number of nonzeros in the various rows of the 3914 DIAGONAL portion of the local submatrix (possibly different for each row) 3915 or NULL, if d_nz is used to specify the nonzero structure. 3916 The size of this array is equal to the number of local rows, i.e 'm'. 3917 For matrices that will be factored, you must leave room for (and set) 3918 the diagonal entry even if it is zero. 3919 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3920 submatrix (same value is used for all local rows). 3921 - o_nnz - array containing the number of nonzeros in the various rows of the 3922 OFF-DIAGONAL portion of the local submatrix (possibly different for 3923 each row) or NULL, if o_nz is used to specify the nonzero 3924 structure. The size of this array is equal to the number 3925 of local rows, i.e 'm'. 3926 3927 If the *_nnz parameter is given then the *_nz parameter is ignored 3928 3929 The AIJ format (also called the Yale sparse matrix format or 3930 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3931 storage. The stored row and column indices begin with zero. 3932 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3933 3934 The parallel matrix is partitioned such that the first m0 rows belong to 3935 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3936 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3937 3938 The DIAGONAL portion of the local submatrix of a processor can be defined 3939 as the submatrix which is obtained by extraction the part corresponding to 3940 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3941 first row that belongs to the processor, r2 is the last row belonging to 3942 the this processor, and c1-c2 is range of indices of the local part of a 3943 vector suitable for applying the matrix to. This is an mxn matrix. In the 3944 common case of a square matrix, the row and column ranges are the same and 3945 the DIAGONAL part is also square. The remaining portion of the local 3946 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3947 3948 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3949 3950 You can call MatGetInfo() to get information on how effective the preallocation was; 3951 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3952 You can also run with the option -info and look for messages with the string 3953 malloc in them to see if additional memory allocation was needed. 3954 3955 Example usage: 3956 3957 Consider the following 8x8 matrix with 34 non-zero values, that is 3958 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3959 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3960 as follows: 3961 3962 .vb 3963 1 2 0 | 0 3 0 | 0 4 3964 Proc0 0 5 6 | 7 0 0 | 8 0 3965 9 0 10 | 11 0 0 | 12 0 3966 ------------------------------------- 3967 13 0 14 | 15 16 17 | 0 0 3968 Proc1 0 18 0 | 19 20 21 | 0 0 3969 0 0 0 | 22 23 0 | 24 0 3970 ------------------------------------- 3971 Proc2 25 26 27 | 0 0 28 | 29 0 3972 30 0 0 | 31 32 33 | 0 34 3973 .ve 3974 3975 This can be represented as a collection of submatrices as: 3976 3977 .vb 3978 A B C 3979 D E F 3980 G H I 3981 .ve 3982 3983 Where the submatrices A,B,C are owned by proc0, D,E,F are 3984 owned by proc1, G,H,I are owned by proc2. 3985 3986 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3987 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3988 The 'M','N' parameters are 8,8, and have the same values on all procs. 3989 3990 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3991 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3992 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3993 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3994 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3995 matrix, ans [DF] as another SeqAIJ matrix. 3996 3997 When d_nz, o_nz parameters are specified, d_nz storage elements are 3998 allocated for every row of the local diagonal submatrix, and o_nz 3999 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4000 One way to choose d_nz and o_nz is to use the max nonzerors per local 4001 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4002 In this case, the values of d_nz,o_nz are: 4003 .vb 4004 proc0 : dnz = 2, o_nz = 2 4005 proc1 : dnz = 3, o_nz = 2 4006 proc2 : dnz = 1, o_nz = 4 4007 .ve 4008 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4009 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4010 for proc3. i.e we are using 12+15+10=37 storage locations to store 4011 34 values. 4012 4013 When d_nnz, o_nnz parameters are specified, the storage is specified 4014 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4015 In the above case the values for d_nnz,o_nnz are: 4016 .vb 4017 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4018 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4019 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4020 .ve 4021 Here the space allocated is sum of all the above values i.e 34, and 4022 hence pre-allocation is perfect. 4023 4024 Level: intermediate 4025 4026 .keywords: matrix, aij, compressed row, sparse, parallel 4027 4028 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4029 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4030 @*/ 4031 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4032 { 4033 PetscErrorCode ierr; 4034 4035 PetscFunctionBegin; 4036 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4037 PetscValidType(B,1); 4038 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4039 PetscFunctionReturn(0); 4040 } 4041 4042 #undef __FUNCT__ 4043 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4044 /*@ 4045 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4046 CSR format the local rows. 4047 4048 Collective on MPI_Comm 4049 4050 Input Parameters: 4051 + comm - MPI communicator 4052 . m - number of local rows (Cannot be PETSC_DECIDE) 4053 . n - This value should be the same as the local size used in creating the 4054 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4055 calculated if N is given) For square matrices n is almost always m. 4056 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4057 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4058 . i - row indices 4059 . j - column indices 4060 - a - matrix values 4061 4062 Output Parameter: 4063 . mat - the matrix 4064 4065 Level: intermediate 4066 4067 Notes: 4068 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4069 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4070 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4071 4072 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4073 4074 The format which is used for the sparse matrix input, is equivalent to a 4075 row-major ordering.. i.e for the following matrix, the input data expected is 4076 as shown: 4077 4078 1 0 0 4079 2 0 3 P0 4080 ------- 4081 4 5 6 P1 4082 4083 Process0 [P0]: rows_owned=[0,1] 4084 i = {0,1,3} [size = nrow+1 = 2+1] 4085 j = {0,0,2} [size = nz = 6] 4086 v = {1,2,3} [size = nz = 6] 4087 4088 Process1 [P1]: rows_owned=[2] 4089 i = {0,3} [size = nrow+1 = 1+1] 4090 j = {0,1,2} [size = nz = 6] 4091 v = {4,5,6} [size = nz = 6] 4092 4093 .keywords: matrix, aij, compressed row, sparse, parallel 4094 4095 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4096 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4097 @*/ 4098 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4099 { 4100 PetscErrorCode ierr; 4101 4102 PetscFunctionBegin; 4103 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4104 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4105 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4106 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4107 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4108 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4109 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4110 PetscFunctionReturn(0); 4111 } 4112 4113 #undef __FUNCT__ 4114 #define __FUNCT__ "MatCreateAIJ" 4115 /*@C 4116 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4117 (the default parallel PETSc format). For good matrix assembly performance 4118 the user should preallocate the matrix storage by setting the parameters 4119 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4120 performance can be increased by more than a factor of 50. 4121 4122 Collective on MPI_Comm 4123 4124 Input Parameters: 4125 + comm - MPI communicator 4126 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4127 This value should be the same as the local size used in creating the 4128 y vector for the matrix-vector product y = Ax. 4129 . n - This value should be the same as the local size used in creating the 4130 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4131 calculated if N is given) For square matrices n is almost always m. 4132 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4133 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4134 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4135 (same value is used for all local rows) 4136 . d_nnz - array containing the number of nonzeros in the various rows of the 4137 DIAGONAL portion of the local submatrix (possibly different for each row) 4138 or NULL, if d_nz is used to specify the nonzero structure. 4139 The size of this array is equal to the number of local rows, i.e 'm'. 4140 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4141 submatrix (same value is used for all local rows). 4142 - o_nnz - array containing the number of nonzeros in the various rows of the 4143 OFF-DIAGONAL portion of the local submatrix (possibly different for 4144 each row) or NULL, if o_nz is used to specify the nonzero 4145 structure. The size of this array is equal to the number 4146 of local rows, i.e 'm'. 4147 4148 Output Parameter: 4149 . A - the matrix 4150 4151 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4152 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4153 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4154 4155 Notes: 4156 If the *_nnz parameter is given then the *_nz parameter is ignored 4157 4158 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4159 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4160 storage requirements for this matrix. 4161 4162 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4163 processor than it must be used on all processors that share the object for 4164 that argument. 4165 4166 The user MUST specify either the local or global matrix dimensions 4167 (possibly both). 4168 4169 The parallel matrix is partitioned across processors such that the 4170 first m0 rows belong to process 0, the next m1 rows belong to 4171 process 1, the next m2 rows belong to process 2 etc.. where 4172 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4173 values corresponding to [m x N] submatrix. 4174 4175 The columns are logically partitioned with the n0 columns belonging 4176 to 0th partition, the next n1 columns belonging to the next 4177 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4178 4179 The DIAGONAL portion of the local submatrix on any given processor 4180 is the submatrix corresponding to the rows and columns m,n 4181 corresponding to the given processor. i.e diagonal matrix on 4182 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4183 etc. The remaining portion of the local submatrix [m x (N-n)] 4184 constitute the OFF-DIAGONAL portion. The example below better 4185 illustrates this concept. 4186 4187 For a square global matrix we define each processor's diagonal portion 4188 to be its local rows and the corresponding columns (a square submatrix); 4189 each processor's off-diagonal portion encompasses the remainder of the 4190 local matrix (a rectangular submatrix). 4191 4192 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4193 4194 When calling this routine with a single process communicator, a matrix of 4195 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4196 type of communicator, use the construction mechanism: 4197 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4198 4199 By default, this format uses inodes (identical nodes) when possible. 4200 We search for consecutive rows with the same nonzero structure, thereby 4201 reusing matrix information to achieve increased efficiency. 4202 4203 Options Database Keys: 4204 + -mat_no_inode - Do not use inodes 4205 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4206 - -mat_aij_oneindex - Internally use indexing starting at 1 4207 rather than 0. Note that when calling MatSetValues(), 4208 the user still MUST index entries starting at 0! 4209 4210 4211 Example usage: 4212 4213 Consider the following 8x8 matrix with 34 non-zero values, that is 4214 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4215 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4216 as follows: 4217 4218 .vb 4219 1 2 0 | 0 3 0 | 0 4 4220 Proc0 0 5 6 | 7 0 0 | 8 0 4221 9 0 10 | 11 0 0 | 12 0 4222 ------------------------------------- 4223 13 0 14 | 15 16 17 | 0 0 4224 Proc1 0 18 0 | 19 20 21 | 0 0 4225 0 0 0 | 22 23 0 | 24 0 4226 ------------------------------------- 4227 Proc2 25 26 27 | 0 0 28 | 29 0 4228 30 0 0 | 31 32 33 | 0 34 4229 .ve 4230 4231 This can be represented as a collection of submatrices as: 4232 4233 .vb 4234 A B C 4235 D E F 4236 G H I 4237 .ve 4238 4239 Where the submatrices A,B,C are owned by proc0, D,E,F are 4240 owned by proc1, G,H,I are owned by proc2. 4241 4242 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4243 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4244 The 'M','N' parameters are 8,8, and have the same values on all procs. 4245 4246 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4247 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4248 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4249 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4250 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4251 matrix, ans [DF] as another SeqAIJ matrix. 4252 4253 When d_nz, o_nz parameters are specified, d_nz storage elements are 4254 allocated for every row of the local diagonal submatrix, and o_nz 4255 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4256 One way to choose d_nz and o_nz is to use the max nonzerors per local 4257 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4258 In this case, the values of d_nz,o_nz are: 4259 .vb 4260 proc0 : dnz = 2, o_nz = 2 4261 proc1 : dnz = 3, o_nz = 2 4262 proc2 : dnz = 1, o_nz = 4 4263 .ve 4264 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4265 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4266 for proc3. i.e we are using 12+15+10=37 storage locations to store 4267 34 values. 4268 4269 When d_nnz, o_nnz parameters are specified, the storage is specified 4270 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4271 In the above case the values for d_nnz,o_nnz are: 4272 .vb 4273 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4274 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4275 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4276 .ve 4277 Here the space allocated is sum of all the above values i.e 34, and 4278 hence pre-allocation is perfect. 4279 4280 Level: intermediate 4281 4282 .keywords: matrix, aij, compressed row, sparse, parallel 4283 4284 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4285 MPIAIJ, MatCreateMPIAIJWithArrays() 4286 @*/ 4287 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4288 { 4289 PetscErrorCode ierr; 4290 PetscMPIInt size; 4291 4292 PetscFunctionBegin; 4293 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4294 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4295 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4296 if (size > 1) { 4297 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4298 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4299 } else { 4300 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4301 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4302 } 4303 PetscFunctionReturn(0); 4304 } 4305 4306 #undef __FUNCT__ 4307 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4308 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4309 { 4310 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4311 4312 PetscFunctionBegin; 4313 *Ad = a->A; 4314 *Ao = a->B; 4315 *colmap = a->garray; 4316 PetscFunctionReturn(0); 4317 } 4318 4319 #undef __FUNCT__ 4320 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4321 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4322 { 4323 PetscErrorCode ierr; 4324 PetscInt i; 4325 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4326 4327 PetscFunctionBegin; 4328 if (coloring->ctype == IS_COLORING_GLOBAL) { 4329 ISColoringValue *allcolors,*colors; 4330 ISColoring ocoloring; 4331 4332 /* set coloring for diagonal portion */ 4333 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4334 4335 /* set coloring for off-diagonal portion */ 4336 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4337 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4338 for (i=0; i<a->B->cmap->n; i++) { 4339 colors[i] = allcolors[a->garray[i]]; 4340 } 4341 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4342 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4343 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4344 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4345 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4346 ISColoringValue *colors; 4347 PetscInt *larray; 4348 ISColoring ocoloring; 4349 4350 /* set coloring for diagonal portion */ 4351 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4352 for (i=0; i<a->A->cmap->n; i++) { 4353 larray[i] = i + A->cmap->rstart; 4354 } 4355 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4356 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4357 for (i=0; i<a->A->cmap->n; i++) { 4358 colors[i] = coloring->colors[larray[i]]; 4359 } 4360 ierr = PetscFree(larray);CHKERRQ(ierr); 4361 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4362 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4363 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4364 4365 /* set coloring for off-diagonal portion */ 4366 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4367 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4368 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4369 for (i=0; i<a->B->cmap->n; i++) { 4370 colors[i] = coloring->colors[larray[i]]; 4371 } 4372 ierr = PetscFree(larray);CHKERRQ(ierr); 4373 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4374 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4375 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4376 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4377 PetscFunctionReturn(0); 4378 } 4379 4380 #undef __FUNCT__ 4381 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4382 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4383 { 4384 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4385 PetscErrorCode ierr; 4386 4387 PetscFunctionBegin; 4388 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4389 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4390 PetscFunctionReturn(0); 4391 } 4392 4393 #undef __FUNCT__ 4394 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4395 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4396 { 4397 PetscErrorCode ierr; 4398 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4399 PetscInt *indx; 4400 4401 PetscFunctionBegin; 4402 /* This routine will ONLY return MPIAIJ type matrix */ 4403 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4404 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4405 if (n == PETSC_DECIDE) { 4406 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4407 } 4408 /* Check sum(n) = N */ 4409 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4410 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4411 4412 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4413 rstart -= m; 4414 4415 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4416 for (i=0; i<m; i++) { 4417 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4418 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4419 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4420 } 4421 4422 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4423 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4424 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4425 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4426 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4427 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4428 PetscFunctionReturn(0); 4429 } 4430 4431 #undef __FUNCT__ 4432 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4433 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4434 { 4435 PetscErrorCode ierr; 4436 PetscInt m,N,i,rstart,nnz,Ii; 4437 PetscInt *indx; 4438 PetscScalar *values; 4439 4440 PetscFunctionBegin; 4441 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4442 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4443 for (i=0; i<m; i++) { 4444 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4445 Ii = i + rstart; 4446 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4447 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4448 } 4449 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4450 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4451 PetscFunctionReturn(0); 4452 } 4453 4454 #undef __FUNCT__ 4455 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4456 /*@ 4457 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4458 matrices from each processor 4459 4460 Collective on MPI_Comm 4461 4462 Input Parameters: 4463 + comm - the communicators the parallel matrix will live on 4464 . inmat - the input sequential matrices 4465 . n - number of local columns (or PETSC_DECIDE) 4466 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4467 4468 Output Parameter: 4469 . outmat - the parallel matrix generated 4470 4471 Level: advanced 4472 4473 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4474 4475 @*/ 4476 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4477 { 4478 PetscErrorCode ierr; 4479 4480 PetscFunctionBegin; 4481 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4482 if (scall == MAT_INITIAL_MATRIX) { 4483 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4484 } 4485 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4486 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4487 PetscFunctionReturn(0); 4488 } 4489 4490 #undef __FUNCT__ 4491 #define __FUNCT__ "MatFileSplit" 4492 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4493 { 4494 PetscErrorCode ierr; 4495 PetscMPIInt rank; 4496 PetscInt m,N,i,rstart,nnz; 4497 size_t len; 4498 const PetscInt *indx; 4499 PetscViewer out; 4500 char *name; 4501 Mat B; 4502 const PetscScalar *values; 4503 4504 PetscFunctionBegin; 4505 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4506 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4507 /* Should this be the type of the diagonal block of A? */ 4508 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4509 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4510 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4511 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4512 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4513 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4514 for (i=0; i<m; i++) { 4515 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4516 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4517 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4518 } 4519 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4520 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4521 4522 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4523 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4524 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4525 sprintf(name,"%s.%d",outfile,rank); 4526 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4527 ierr = PetscFree(name);CHKERRQ(ierr); 4528 ierr = MatView(B,out);CHKERRQ(ierr); 4529 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4530 ierr = MatDestroy(&B);CHKERRQ(ierr); 4531 PetscFunctionReturn(0); 4532 } 4533 4534 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4535 #undef __FUNCT__ 4536 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4537 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4538 { 4539 PetscErrorCode ierr; 4540 Mat_Merge_SeqsToMPI *merge; 4541 PetscContainer container; 4542 4543 PetscFunctionBegin; 4544 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4545 if (container) { 4546 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4547 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4548 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4549 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4550 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4551 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4552 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4553 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4554 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4555 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4556 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4557 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4558 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4559 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4560 ierr = PetscFree(merge);CHKERRQ(ierr); 4561 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4562 } 4563 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4564 PetscFunctionReturn(0); 4565 } 4566 4567 #include <../src/mat/utils/freespace.h> 4568 #include <petscbt.h> 4569 4570 #undef __FUNCT__ 4571 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4572 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4573 { 4574 PetscErrorCode ierr; 4575 MPI_Comm comm; 4576 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4577 PetscMPIInt size,rank,taga,*len_s; 4578 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4579 PetscInt proc,m; 4580 PetscInt **buf_ri,**buf_rj; 4581 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4582 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4583 MPI_Request *s_waits,*r_waits; 4584 MPI_Status *status; 4585 MatScalar *aa=a->a; 4586 MatScalar **abuf_r,*ba_i; 4587 Mat_Merge_SeqsToMPI *merge; 4588 PetscContainer container; 4589 4590 PetscFunctionBegin; 4591 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4592 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4593 4594 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4595 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4596 4597 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4598 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4599 4600 bi = merge->bi; 4601 bj = merge->bj; 4602 buf_ri = merge->buf_ri; 4603 buf_rj = merge->buf_rj; 4604 4605 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4606 owners = merge->rowmap->range; 4607 len_s = merge->len_s; 4608 4609 /* send and recv matrix values */ 4610 /*-----------------------------*/ 4611 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4612 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4613 4614 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4615 for (proc=0,k=0; proc<size; proc++) { 4616 if (!len_s[proc]) continue; 4617 i = owners[proc]; 4618 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4619 k++; 4620 } 4621 4622 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4623 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4624 ierr = PetscFree(status);CHKERRQ(ierr); 4625 4626 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4627 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4628 4629 /* insert mat values of mpimat */ 4630 /*----------------------------*/ 4631 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4632 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4633 4634 for (k=0; k<merge->nrecv; k++) { 4635 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4636 nrows = *(buf_ri_k[k]); 4637 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4638 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4639 } 4640 4641 /* set values of ba */ 4642 m = merge->rowmap->n; 4643 for (i=0; i<m; i++) { 4644 arow = owners[rank] + i; 4645 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4646 bnzi = bi[i+1] - bi[i]; 4647 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4648 4649 /* add local non-zero vals of this proc's seqmat into ba */ 4650 anzi = ai[arow+1] - ai[arow]; 4651 aj = a->j + ai[arow]; 4652 aa = a->a + ai[arow]; 4653 nextaj = 0; 4654 for (j=0; nextaj<anzi; j++) { 4655 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4656 ba_i[j] += aa[nextaj++]; 4657 } 4658 } 4659 4660 /* add received vals into ba */ 4661 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4662 /* i-th row */ 4663 if (i == *nextrow[k]) { 4664 anzi = *(nextai[k]+1) - *nextai[k]; 4665 aj = buf_rj[k] + *(nextai[k]); 4666 aa = abuf_r[k] + *(nextai[k]); 4667 nextaj = 0; 4668 for (j=0; nextaj<anzi; j++) { 4669 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4670 ba_i[j] += aa[nextaj++]; 4671 } 4672 } 4673 nextrow[k]++; nextai[k]++; 4674 } 4675 } 4676 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4677 } 4678 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4679 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4680 4681 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4682 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4683 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4684 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4685 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4686 PetscFunctionReturn(0); 4687 } 4688 4689 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4690 4691 #undef __FUNCT__ 4692 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4693 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4694 { 4695 PetscErrorCode ierr; 4696 Mat B_mpi; 4697 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4698 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4699 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4700 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4701 PetscInt len,proc,*dnz,*onz,bs,cbs; 4702 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4703 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4704 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4705 MPI_Status *status; 4706 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4707 PetscBT lnkbt; 4708 Mat_Merge_SeqsToMPI *merge; 4709 PetscContainer container; 4710 4711 PetscFunctionBegin; 4712 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4713 4714 /* make sure it is a PETSc comm */ 4715 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4716 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4717 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4718 4719 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4720 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4721 4722 /* determine row ownership */ 4723 /*---------------------------------------------------------*/ 4724 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4725 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4726 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4727 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4728 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4729 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4730 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4731 4732 m = merge->rowmap->n; 4733 owners = merge->rowmap->range; 4734 4735 /* determine the number of messages to send, their lengths */ 4736 /*---------------------------------------------------------*/ 4737 len_s = merge->len_s; 4738 4739 len = 0; /* length of buf_si[] */ 4740 merge->nsend = 0; 4741 for (proc=0; proc<size; proc++) { 4742 len_si[proc] = 0; 4743 if (proc == rank) { 4744 len_s[proc] = 0; 4745 } else { 4746 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4747 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4748 } 4749 if (len_s[proc]) { 4750 merge->nsend++; 4751 nrows = 0; 4752 for (i=owners[proc]; i<owners[proc+1]; i++) { 4753 if (ai[i+1] > ai[i]) nrows++; 4754 } 4755 len_si[proc] = 2*(nrows+1); 4756 len += len_si[proc]; 4757 } 4758 } 4759 4760 /* determine the number and length of messages to receive for ij-structure */ 4761 /*-------------------------------------------------------------------------*/ 4762 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4763 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4764 4765 /* post the Irecv of j-structure */ 4766 /*-------------------------------*/ 4767 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4768 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4769 4770 /* post the Isend of j-structure */ 4771 /*--------------------------------*/ 4772 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4773 4774 for (proc=0, k=0; proc<size; proc++) { 4775 if (!len_s[proc]) continue; 4776 i = owners[proc]; 4777 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4778 k++; 4779 } 4780 4781 /* receives and sends of j-structure are complete */ 4782 /*------------------------------------------------*/ 4783 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4784 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4785 4786 /* send and recv i-structure */ 4787 /*---------------------------*/ 4788 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4789 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4790 4791 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4792 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4793 for (proc=0,k=0; proc<size; proc++) { 4794 if (!len_s[proc]) continue; 4795 /* form outgoing message for i-structure: 4796 buf_si[0]: nrows to be sent 4797 [1:nrows]: row index (global) 4798 [nrows+1:2*nrows+1]: i-structure index 4799 */ 4800 /*-------------------------------------------*/ 4801 nrows = len_si[proc]/2 - 1; 4802 buf_si_i = buf_si + nrows+1; 4803 buf_si[0] = nrows; 4804 buf_si_i[0] = 0; 4805 nrows = 0; 4806 for (i=owners[proc]; i<owners[proc+1]; i++) { 4807 anzi = ai[i+1] - ai[i]; 4808 if (anzi) { 4809 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4810 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4811 nrows++; 4812 } 4813 } 4814 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4815 k++; 4816 buf_si += len_si[proc]; 4817 } 4818 4819 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4820 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4821 4822 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4823 for (i=0; i<merge->nrecv; i++) { 4824 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4825 } 4826 4827 ierr = PetscFree(len_si);CHKERRQ(ierr); 4828 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4829 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4830 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4831 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4832 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4833 ierr = PetscFree(status);CHKERRQ(ierr); 4834 4835 /* compute a local seq matrix in each processor */ 4836 /*----------------------------------------------*/ 4837 /* allocate bi array and free space for accumulating nonzero column info */ 4838 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4839 bi[0] = 0; 4840 4841 /* create and initialize a linked list */ 4842 nlnk = N+1; 4843 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4844 4845 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4846 len = ai[owners[rank+1]] - ai[owners[rank]]; 4847 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4848 4849 current_space = free_space; 4850 4851 /* determine symbolic info for each local row */ 4852 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4853 4854 for (k=0; k<merge->nrecv; k++) { 4855 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4856 nrows = *buf_ri_k[k]; 4857 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4858 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4859 } 4860 4861 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4862 len = 0; 4863 for (i=0; i<m; i++) { 4864 bnzi = 0; 4865 /* add local non-zero cols of this proc's seqmat into lnk */ 4866 arow = owners[rank] + i; 4867 anzi = ai[arow+1] - ai[arow]; 4868 aj = a->j + ai[arow]; 4869 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4870 bnzi += nlnk; 4871 /* add received col data into lnk */ 4872 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4873 if (i == *nextrow[k]) { /* i-th row */ 4874 anzi = *(nextai[k]+1) - *nextai[k]; 4875 aj = buf_rj[k] + *nextai[k]; 4876 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4877 bnzi += nlnk; 4878 nextrow[k]++; nextai[k]++; 4879 } 4880 } 4881 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4882 4883 /* if free space is not available, make more free space */ 4884 if (current_space->local_remaining<bnzi) { 4885 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4886 nspacedouble++; 4887 } 4888 /* copy data into free space, then initialize lnk */ 4889 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4890 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4891 4892 current_space->array += bnzi; 4893 current_space->local_used += bnzi; 4894 current_space->local_remaining -= bnzi; 4895 4896 bi[i+1] = bi[i] + bnzi; 4897 } 4898 4899 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4900 4901 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4902 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4903 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4904 4905 /* create symbolic parallel matrix B_mpi */ 4906 /*---------------------------------------*/ 4907 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4908 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4909 if (n==PETSC_DECIDE) { 4910 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4911 } else { 4912 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4913 } 4914 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4915 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4916 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4917 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4918 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4919 4920 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4921 B_mpi->assembled = PETSC_FALSE; 4922 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4923 merge->bi = bi; 4924 merge->bj = bj; 4925 merge->buf_ri = buf_ri; 4926 merge->buf_rj = buf_rj; 4927 merge->coi = NULL; 4928 merge->coj = NULL; 4929 merge->owners_co = NULL; 4930 4931 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4932 4933 /* attach the supporting struct to B_mpi for reuse */ 4934 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4935 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4936 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4937 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4938 *mpimat = B_mpi; 4939 4940 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4941 PetscFunctionReturn(0); 4942 } 4943 4944 #undef __FUNCT__ 4945 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4946 /*@C 4947 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4948 matrices from each processor 4949 4950 Collective on MPI_Comm 4951 4952 Input Parameters: 4953 + comm - the communicators the parallel matrix will live on 4954 . seqmat - the input sequential matrices 4955 . m - number of local rows (or PETSC_DECIDE) 4956 . n - number of local columns (or PETSC_DECIDE) 4957 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4958 4959 Output Parameter: 4960 . mpimat - the parallel matrix generated 4961 4962 Level: advanced 4963 4964 Notes: 4965 The dimensions of the sequential matrix in each processor MUST be the same. 4966 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4967 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4968 @*/ 4969 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4970 { 4971 PetscErrorCode ierr; 4972 PetscMPIInt size; 4973 4974 PetscFunctionBegin; 4975 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4976 if (size == 1) { 4977 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4978 if (scall == MAT_INITIAL_MATRIX) { 4979 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4980 } else { 4981 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4982 } 4983 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4984 PetscFunctionReturn(0); 4985 } 4986 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4987 if (scall == MAT_INITIAL_MATRIX) { 4988 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4989 } 4990 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4991 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4992 PetscFunctionReturn(0); 4993 } 4994 4995 #undef __FUNCT__ 4996 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4997 /*@ 4998 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4999 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5000 with MatGetSize() 5001 5002 Not Collective 5003 5004 Input Parameters: 5005 + A - the matrix 5006 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5007 5008 Output Parameter: 5009 . A_loc - the local sequential matrix generated 5010 5011 Level: developer 5012 5013 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5014 5015 @*/ 5016 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5017 { 5018 PetscErrorCode ierr; 5019 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5020 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5021 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5022 MatScalar *aa=a->a,*ba=b->a,*cam; 5023 PetscScalar *ca; 5024 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5025 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5026 PetscBool match; 5027 5028 PetscFunctionBegin; 5029 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5030 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5031 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5032 if (scall == MAT_INITIAL_MATRIX) { 5033 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5034 ci[0] = 0; 5035 for (i=0; i<am; i++) { 5036 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5037 } 5038 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5039 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5040 k = 0; 5041 for (i=0; i<am; i++) { 5042 ncols_o = bi[i+1] - bi[i]; 5043 ncols_d = ai[i+1] - ai[i]; 5044 /* off-diagonal portion of A */ 5045 for (jo=0; jo<ncols_o; jo++) { 5046 col = cmap[*bj]; 5047 if (col >= cstart) break; 5048 cj[k] = col; bj++; 5049 ca[k++] = *ba++; 5050 } 5051 /* diagonal portion of A */ 5052 for (j=0; j<ncols_d; j++) { 5053 cj[k] = cstart + *aj++; 5054 ca[k++] = *aa++; 5055 } 5056 /* off-diagonal portion of A */ 5057 for (j=jo; j<ncols_o; j++) { 5058 cj[k] = cmap[*bj++]; 5059 ca[k++] = *ba++; 5060 } 5061 } 5062 /* put together the new matrix */ 5063 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5064 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5065 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5066 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5067 mat->free_a = PETSC_TRUE; 5068 mat->free_ij = PETSC_TRUE; 5069 mat->nonew = 0; 5070 } else if (scall == MAT_REUSE_MATRIX) { 5071 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5072 ci = mat->i; cj = mat->j; cam = mat->a; 5073 for (i=0; i<am; i++) { 5074 /* off-diagonal portion of A */ 5075 ncols_o = bi[i+1] - bi[i]; 5076 for (jo=0; jo<ncols_o; jo++) { 5077 col = cmap[*bj]; 5078 if (col >= cstart) break; 5079 *cam++ = *ba++; bj++; 5080 } 5081 /* diagonal portion of A */ 5082 ncols_d = ai[i+1] - ai[i]; 5083 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5084 /* off-diagonal portion of A */ 5085 for (j=jo; j<ncols_o; j++) { 5086 *cam++ = *ba++; bj++; 5087 } 5088 } 5089 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5090 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5091 PetscFunctionReturn(0); 5092 } 5093 5094 #undef __FUNCT__ 5095 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5096 /*@C 5097 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5098 5099 Not Collective 5100 5101 Input Parameters: 5102 + A - the matrix 5103 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5104 - row, col - index sets of rows and columns to extract (or NULL) 5105 5106 Output Parameter: 5107 . A_loc - the local sequential matrix generated 5108 5109 Level: developer 5110 5111 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5112 5113 @*/ 5114 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5115 { 5116 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5117 PetscErrorCode ierr; 5118 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5119 IS isrowa,iscola; 5120 Mat *aloc; 5121 PetscBool match; 5122 5123 PetscFunctionBegin; 5124 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5125 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5126 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5127 if (!row) { 5128 start = A->rmap->rstart; end = A->rmap->rend; 5129 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5130 } else { 5131 isrowa = *row; 5132 } 5133 if (!col) { 5134 start = A->cmap->rstart; 5135 cmap = a->garray; 5136 nzA = a->A->cmap->n; 5137 nzB = a->B->cmap->n; 5138 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5139 ncols = 0; 5140 for (i=0; i<nzB; i++) { 5141 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5142 else break; 5143 } 5144 imark = i; 5145 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5146 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5147 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5148 } else { 5149 iscola = *col; 5150 } 5151 if (scall != MAT_INITIAL_MATRIX) { 5152 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5153 aloc[0] = *A_loc; 5154 } 5155 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5156 *A_loc = aloc[0]; 5157 ierr = PetscFree(aloc);CHKERRQ(ierr); 5158 if (!row) { 5159 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5160 } 5161 if (!col) { 5162 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5163 } 5164 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5165 PetscFunctionReturn(0); 5166 } 5167 5168 #undef __FUNCT__ 5169 #define __FUNCT__ "MatGetBrowsOfAcols" 5170 /*@C 5171 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5172 5173 Collective on Mat 5174 5175 Input Parameters: 5176 + A,B - the matrices in mpiaij format 5177 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5178 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5179 5180 Output Parameter: 5181 + rowb, colb - index sets of rows and columns of B to extract 5182 - B_seq - the sequential matrix generated 5183 5184 Level: developer 5185 5186 @*/ 5187 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5188 { 5189 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5190 PetscErrorCode ierr; 5191 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5192 IS isrowb,iscolb; 5193 Mat *bseq=NULL; 5194 5195 PetscFunctionBegin; 5196 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5197 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5198 } 5199 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5200 5201 if (scall == MAT_INITIAL_MATRIX) { 5202 start = A->cmap->rstart; 5203 cmap = a->garray; 5204 nzA = a->A->cmap->n; 5205 nzB = a->B->cmap->n; 5206 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5207 ncols = 0; 5208 for (i=0; i<nzB; i++) { /* row < local row index */ 5209 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5210 else break; 5211 } 5212 imark = i; 5213 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5214 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5215 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5216 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5217 } else { 5218 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5219 isrowb = *rowb; iscolb = *colb; 5220 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5221 bseq[0] = *B_seq; 5222 } 5223 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5224 *B_seq = bseq[0]; 5225 ierr = PetscFree(bseq);CHKERRQ(ierr); 5226 if (!rowb) { 5227 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5228 } else { 5229 *rowb = isrowb; 5230 } 5231 if (!colb) { 5232 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5233 } else { 5234 *colb = iscolb; 5235 } 5236 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5237 PetscFunctionReturn(0); 5238 } 5239 5240 #undef __FUNCT__ 5241 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5242 /* 5243 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5244 of the OFF-DIAGONAL portion of local A 5245 5246 Collective on Mat 5247 5248 Input Parameters: 5249 + A,B - the matrices in mpiaij format 5250 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5251 5252 Output Parameter: 5253 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5254 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5255 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5256 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5257 5258 Level: developer 5259 5260 */ 5261 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5262 { 5263 VecScatter_MPI_General *gen_to,*gen_from; 5264 PetscErrorCode ierr; 5265 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5266 Mat_SeqAIJ *b_oth; 5267 VecScatter ctx =a->Mvctx; 5268 MPI_Comm comm; 5269 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5270 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5271 PetscScalar *rvalues,*svalues; 5272 MatScalar *b_otha,*bufa,*bufA; 5273 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5274 MPI_Request *rwaits = NULL,*swaits = NULL; 5275 MPI_Status *sstatus,rstatus; 5276 PetscMPIInt jj; 5277 PetscInt *cols,sbs,rbs; 5278 PetscScalar *vals; 5279 5280 PetscFunctionBegin; 5281 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5282 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5283 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5284 } 5285 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5286 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5287 5288 gen_to = (VecScatter_MPI_General*)ctx->todata; 5289 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5290 rvalues = gen_from->values; /* holds the length of receiving row */ 5291 svalues = gen_to->values; /* holds the length of sending row */ 5292 nrecvs = gen_from->n; 5293 nsends = gen_to->n; 5294 5295 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5296 srow = gen_to->indices; /* local row index to be sent */ 5297 sstarts = gen_to->starts; 5298 sprocs = gen_to->procs; 5299 sstatus = gen_to->sstatus; 5300 sbs = gen_to->bs; 5301 rstarts = gen_from->starts; 5302 rprocs = gen_from->procs; 5303 rbs = gen_from->bs; 5304 5305 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5306 if (scall == MAT_INITIAL_MATRIX) { 5307 /* i-array */ 5308 /*---------*/ 5309 /* post receives */ 5310 for (i=0; i<nrecvs; i++) { 5311 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5312 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5313 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5314 } 5315 5316 /* pack the outgoing message */ 5317 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5318 5319 sstartsj[0] = 0; 5320 rstartsj[0] = 0; 5321 len = 0; /* total length of j or a array to be sent */ 5322 k = 0; 5323 for (i=0; i<nsends; i++) { 5324 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5325 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5326 for (j=0; j<nrows; j++) { 5327 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5328 for (l=0; l<sbs; l++) { 5329 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5330 5331 rowlen[j*sbs+l] = ncols; 5332 5333 len += ncols; 5334 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5335 } 5336 k++; 5337 } 5338 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5339 5340 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5341 } 5342 /* recvs and sends of i-array are completed */ 5343 i = nrecvs; 5344 while (i--) { 5345 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5346 } 5347 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5348 5349 /* allocate buffers for sending j and a arrays */ 5350 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5351 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5352 5353 /* create i-array of B_oth */ 5354 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5355 5356 b_othi[0] = 0; 5357 len = 0; /* total length of j or a array to be received */ 5358 k = 0; 5359 for (i=0; i<nrecvs; i++) { 5360 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5361 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5362 for (j=0; j<nrows; j++) { 5363 b_othi[k+1] = b_othi[k] + rowlen[j]; 5364 len += rowlen[j]; k++; 5365 } 5366 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5367 } 5368 5369 /* allocate space for j and a arrrays of B_oth */ 5370 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5371 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5372 5373 /* j-array */ 5374 /*---------*/ 5375 /* post receives of j-array */ 5376 for (i=0; i<nrecvs; i++) { 5377 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5378 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5379 } 5380 5381 /* pack the outgoing message j-array */ 5382 k = 0; 5383 for (i=0; i<nsends; i++) { 5384 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5385 bufJ = bufj+sstartsj[i]; 5386 for (j=0; j<nrows; j++) { 5387 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5388 for (ll=0; ll<sbs; ll++) { 5389 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5390 for (l=0; l<ncols; l++) { 5391 *bufJ++ = cols[l]; 5392 } 5393 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5394 } 5395 } 5396 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5397 } 5398 5399 /* recvs and sends of j-array are completed */ 5400 i = nrecvs; 5401 while (i--) { 5402 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5403 } 5404 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5405 } else if (scall == MAT_REUSE_MATRIX) { 5406 sstartsj = *startsj_s; 5407 rstartsj = *startsj_r; 5408 bufa = *bufa_ptr; 5409 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5410 b_otha = b_oth->a; 5411 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5412 5413 /* a-array */ 5414 /*---------*/ 5415 /* post receives of a-array */ 5416 for (i=0; i<nrecvs; i++) { 5417 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5418 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5419 } 5420 5421 /* pack the outgoing message a-array */ 5422 k = 0; 5423 for (i=0; i<nsends; i++) { 5424 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5425 bufA = bufa+sstartsj[i]; 5426 for (j=0; j<nrows; j++) { 5427 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5428 for (ll=0; ll<sbs; ll++) { 5429 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5430 for (l=0; l<ncols; l++) { 5431 *bufA++ = vals[l]; 5432 } 5433 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5434 } 5435 } 5436 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5437 } 5438 /* recvs and sends of a-array are completed */ 5439 i = nrecvs; 5440 while (i--) { 5441 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5442 } 5443 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5444 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5445 5446 if (scall == MAT_INITIAL_MATRIX) { 5447 /* put together the new matrix */ 5448 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5449 5450 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5451 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5452 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5453 b_oth->free_a = PETSC_TRUE; 5454 b_oth->free_ij = PETSC_TRUE; 5455 b_oth->nonew = 0; 5456 5457 ierr = PetscFree(bufj);CHKERRQ(ierr); 5458 if (!startsj_s || !bufa_ptr) { 5459 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5460 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5461 } else { 5462 *startsj_s = sstartsj; 5463 *startsj_r = rstartsj; 5464 *bufa_ptr = bufa; 5465 } 5466 } 5467 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5468 PetscFunctionReturn(0); 5469 } 5470 5471 #undef __FUNCT__ 5472 #define __FUNCT__ "MatGetCommunicationStructs" 5473 /*@C 5474 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5475 5476 Not Collective 5477 5478 Input Parameters: 5479 . A - The matrix in mpiaij format 5480 5481 Output Parameter: 5482 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5483 . colmap - A map from global column index to local index into lvec 5484 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5485 5486 Level: developer 5487 5488 @*/ 5489 #if defined(PETSC_USE_CTABLE) 5490 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5491 #else 5492 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5493 #endif 5494 { 5495 Mat_MPIAIJ *a; 5496 5497 PetscFunctionBegin; 5498 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5499 PetscValidPointer(lvec, 2); 5500 PetscValidPointer(colmap, 3); 5501 PetscValidPointer(multScatter, 4); 5502 a = (Mat_MPIAIJ*) A->data; 5503 if (lvec) *lvec = a->lvec; 5504 if (colmap) *colmap = a->colmap; 5505 if (multScatter) *multScatter = a->Mvctx; 5506 PetscFunctionReturn(0); 5507 } 5508 5509 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5510 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5511 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5512 5513 #undef __FUNCT__ 5514 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5515 /* 5516 Computes (B'*A')' since computing B*A directly is untenable 5517 5518 n p p 5519 ( ) ( ) ( ) 5520 m ( A ) * n ( B ) = m ( C ) 5521 ( ) ( ) ( ) 5522 5523 */ 5524 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5525 { 5526 PetscErrorCode ierr; 5527 Mat At,Bt,Ct; 5528 5529 PetscFunctionBegin; 5530 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5531 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5532 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5533 ierr = MatDestroy(&At);CHKERRQ(ierr); 5534 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5535 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5536 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5537 PetscFunctionReturn(0); 5538 } 5539 5540 #undef __FUNCT__ 5541 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5542 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5543 { 5544 PetscErrorCode ierr; 5545 PetscInt m=A->rmap->n,n=B->cmap->n; 5546 Mat Cmat; 5547 5548 PetscFunctionBegin; 5549 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5550 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5551 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5552 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5553 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5554 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5555 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5556 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5557 5558 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5559 5560 *C = Cmat; 5561 PetscFunctionReturn(0); 5562 } 5563 5564 /* ----------------------------------------------------------------*/ 5565 #undef __FUNCT__ 5566 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5567 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5568 { 5569 PetscErrorCode ierr; 5570 5571 PetscFunctionBegin; 5572 if (scall == MAT_INITIAL_MATRIX) { 5573 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5574 } 5575 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5576 PetscFunctionReturn(0); 5577 } 5578 5579 #if defined(PETSC_HAVE_MUMPS) 5580 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5581 #endif 5582 #if defined(PETSC_HAVE_PASTIX) 5583 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5584 #endif 5585 #if defined(PETSC_HAVE_SUPERLU_DIST) 5586 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5587 #endif 5588 #if defined(PETSC_HAVE_CLIQUE) 5589 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5590 #endif 5591 5592 /*MC 5593 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5594 5595 Options Database Keys: 5596 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5597 5598 Level: beginner 5599 5600 .seealso: MatCreateAIJ() 5601 M*/ 5602 5603 #undef __FUNCT__ 5604 #define __FUNCT__ "MatCreate_MPIAIJ" 5605 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5606 { 5607 Mat_MPIAIJ *b; 5608 PetscErrorCode ierr; 5609 PetscMPIInt size; 5610 5611 PetscFunctionBegin; 5612 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5613 5614 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5615 B->data = (void*)b; 5616 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5617 B->assembled = PETSC_FALSE; 5618 B->insertmode = NOT_SET_VALUES; 5619 b->size = size; 5620 5621 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5622 5623 /* build cache for off array entries formed */ 5624 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5625 5626 b->donotstash = PETSC_FALSE; 5627 b->colmap = 0; 5628 b->garray = 0; 5629 b->roworiented = PETSC_TRUE; 5630 5631 /* stuff used for matrix vector multiply */ 5632 b->lvec = NULL; 5633 b->Mvctx = NULL; 5634 5635 /* stuff for MatGetRow() */ 5636 b->rowindices = 0; 5637 b->rowvalues = 0; 5638 b->getrowactive = PETSC_FALSE; 5639 5640 /* flexible pointer used in CUSP/CUSPARSE classes */ 5641 b->spptr = NULL; 5642 5643 #if defined(PETSC_HAVE_MUMPS) 5644 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C","MatGetFactor_aij_mumps",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5645 #endif 5646 #if defined(PETSC_HAVE_PASTIX) 5647 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C","MatGetFactor_mpiaij_pastix",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5648 #endif 5649 #if defined(PETSC_HAVE_SUPERLU_DIST) 5650 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C","MatGetFactor_mpiaij_superlu_dist",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5651 #endif 5652 #if defined(PETSC_HAVE_CLIQUE) 5653 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C","MatGetFactor_aij_clique",MatGetFactor_aij_clique);CHKERRQ(ierr); 5654 #endif 5655 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_MPIAIJ",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5656 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_MPIAIJ",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5657 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C","MatGetDiagonalBlock_MPIAIJ",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5658 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C","MatIsTranspose_MPIAIJ",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5659 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C","MatMPIAIJSetPreallocation_MPIAIJ",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5660 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C","MatMPIAIJSetPreallocationCSR_MPIAIJ",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5661 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C","MatDiagonalScaleLocal_MPIAIJ",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5662 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C","MatConvert_MPIAIJ_MPIAIJPERM",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C","MatConvert_MPIAIJ_MPIAIJCRL",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C","MatConvert_MPIAIJ_MPISBAIJ",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5665 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C","MatMatMult_MPIDense_MPIAIJ",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C","MatMatMultSymbolic_MPIDense_MPIAIJ",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C","MatMatMultNumeric_MPIDense_MPIAIJ",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5668 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5669 PetscFunctionReturn(0); 5670 } 5671 5672 #undef __FUNCT__ 5673 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5674 /*@ 5675 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5676 and "off-diagonal" part of the matrix in CSR format. 5677 5678 Collective on MPI_Comm 5679 5680 Input Parameters: 5681 + comm - MPI communicator 5682 . m - number of local rows (Cannot be PETSC_DECIDE) 5683 . n - This value should be the same as the local size used in creating the 5684 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5685 calculated if N is given) For square matrices n is almost always m. 5686 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5687 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5688 . i - row indices for "diagonal" portion of matrix 5689 . j - column indices 5690 . a - matrix values 5691 . oi - row indices for "off-diagonal" portion of matrix 5692 . oj - column indices 5693 - oa - matrix values 5694 5695 Output Parameter: 5696 . mat - the matrix 5697 5698 Level: advanced 5699 5700 Notes: 5701 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5702 must free the arrays once the matrix has been destroyed and not before. 5703 5704 The i and j indices are 0 based 5705 5706 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5707 5708 This sets local rows and cannot be used to set off-processor values. 5709 5710 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5711 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5712 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5713 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5714 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5715 communication if it is known that only local entries will be set. 5716 5717 .keywords: matrix, aij, compressed row, sparse, parallel 5718 5719 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5720 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5721 @*/ 5722 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5723 { 5724 PetscErrorCode ierr; 5725 Mat_MPIAIJ *maij; 5726 5727 PetscFunctionBegin; 5728 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5729 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5730 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5731 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5732 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5733 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5734 maij = (Mat_MPIAIJ*) (*mat)->data; 5735 5736 (*mat)->preallocated = PETSC_TRUE; 5737 5738 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5739 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5740 5741 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5742 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5743 5744 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5745 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5746 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5747 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5748 5749 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5750 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5751 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5752 PetscFunctionReturn(0); 5753 } 5754 5755 /* 5756 Special version for direct calls from Fortran 5757 */ 5758 #include <petsc-private/fortranimpl.h> 5759 5760 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5761 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5762 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5763 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5764 #endif 5765 5766 /* Change these macros so can be used in void function */ 5767 #undef CHKERRQ 5768 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5769 #undef SETERRQ2 5770 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5771 #undef SETERRQ3 5772 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5773 #undef SETERRQ 5774 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5775 5776 #undef __FUNCT__ 5777 #define __FUNCT__ "matsetvaluesmpiaij_" 5778 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5779 { 5780 Mat mat = *mmat; 5781 PetscInt m = *mm, n = *mn; 5782 InsertMode addv = *maddv; 5783 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5784 PetscScalar value; 5785 PetscErrorCode ierr; 5786 5787 MatCheckPreallocated(mat,1); 5788 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5789 5790 #if defined(PETSC_USE_DEBUG) 5791 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5792 #endif 5793 { 5794 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5795 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5796 PetscBool roworiented = aij->roworiented; 5797 5798 /* Some Variables required in the macro */ 5799 Mat A = aij->A; 5800 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5801 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5802 MatScalar *aa = a->a; 5803 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5804 Mat B = aij->B; 5805 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5806 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5807 MatScalar *ba = b->a; 5808 5809 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5810 PetscInt nonew = a->nonew; 5811 MatScalar *ap1,*ap2; 5812 5813 PetscFunctionBegin; 5814 for (i=0; i<m; i++) { 5815 if (im[i] < 0) continue; 5816 #if defined(PETSC_USE_DEBUG) 5817 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5818 #endif 5819 if (im[i] >= rstart && im[i] < rend) { 5820 row = im[i] - rstart; 5821 lastcol1 = -1; 5822 rp1 = aj + ai[row]; 5823 ap1 = aa + ai[row]; 5824 rmax1 = aimax[row]; 5825 nrow1 = ailen[row]; 5826 low1 = 0; 5827 high1 = nrow1; 5828 lastcol2 = -1; 5829 rp2 = bj + bi[row]; 5830 ap2 = ba + bi[row]; 5831 rmax2 = bimax[row]; 5832 nrow2 = bilen[row]; 5833 low2 = 0; 5834 high2 = nrow2; 5835 5836 for (j=0; j<n; j++) { 5837 if (roworiented) value = v[i*n+j]; 5838 else value = v[i+j*m]; 5839 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5840 if (in[j] >= cstart && in[j] < cend) { 5841 col = in[j] - cstart; 5842 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5843 } else if (in[j] < 0) continue; 5844 #if defined(PETSC_USE_DEBUG) 5845 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5846 #endif 5847 else { 5848 if (mat->was_assembled) { 5849 if (!aij->colmap) { 5850 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5851 } 5852 #if defined(PETSC_USE_CTABLE) 5853 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5854 col--; 5855 #else 5856 col = aij->colmap[in[j]] - 1; 5857 #endif 5858 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5859 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5860 col = in[j]; 5861 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5862 B = aij->B; 5863 b = (Mat_SeqAIJ*)B->data; 5864 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5865 rp2 = bj + bi[row]; 5866 ap2 = ba + bi[row]; 5867 rmax2 = bimax[row]; 5868 nrow2 = bilen[row]; 5869 low2 = 0; 5870 high2 = nrow2; 5871 bm = aij->B->rmap->n; 5872 ba = b->a; 5873 } 5874 } else col = in[j]; 5875 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5876 } 5877 } 5878 } else if (!aij->donotstash) { 5879 if (roworiented) { 5880 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5881 } else { 5882 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5883 } 5884 } 5885 } 5886 } 5887 PetscFunctionReturnVoid(); 5888 } 5889 5890