1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 6 /*MC 7 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 8 9 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 10 and MATMPIAIJ otherwise. As a result, for single process communicators, 11 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 12 for communicators controlling multiple processes. It is recommended that you call both of 13 the above preallocation routines for simplicity. 14 15 Options Database Keys: 16 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 17 18 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 19 enough exist. 20 21 Level: beginner 22 23 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 24 M*/ 25 26 /*MC 27 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 28 29 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 30 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 31 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 32 for communicators controlling multiple processes. It is recommended that you call both of 33 the above preallocation routines for simplicity. 34 35 Options Database Keys: 36 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 37 38 Level: beginner 39 40 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 41 M*/ 42 43 #undef __FUNCT__ 44 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 45 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 46 { 47 PetscErrorCode ierr; 48 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 49 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 50 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 51 const PetscInt *ia,*ib; 52 const MatScalar *aa,*bb; 53 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 54 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 55 56 PetscFunctionBegin; 57 *keptrows = 0; 58 ia = a->i; 59 ib = b->i; 60 for (i=0; i<m; i++) { 61 na = ia[i+1] - ia[i]; 62 nb = ib[i+1] - ib[i]; 63 if (!na && !nb) { 64 cnt++; 65 goto ok1; 66 } 67 aa = a->a + ia[i]; 68 for (j=0; j<na; j++) { 69 if (aa[j] != 0.0) goto ok1; 70 } 71 bb = b->a + ib[i]; 72 for (j=0; j <nb; j++) { 73 if (bb[j] != 0.0) goto ok1; 74 } 75 cnt++; 76 ok1:; 77 } 78 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 79 if (!n0rows) PetscFunctionReturn(0); 80 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 81 cnt = 0; 82 for (i=0; i<m; i++) { 83 na = ia[i+1] - ia[i]; 84 nb = ib[i+1] - ib[i]; 85 if (!na && !nb) continue; 86 aa = a->a + ia[i]; 87 for (j=0; j<na;j++) { 88 if (aa[j] != 0.0) { 89 rows[cnt++] = rstart + i; 90 goto ok2; 91 } 92 } 93 bb = b->a + ib[i]; 94 for (j=0; j<nb; j++) { 95 if (bb[j] != 0.0) { 96 rows[cnt++] = rstart + i; 97 goto ok2; 98 } 99 } 100 ok2:; 101 } 102 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 108 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 109 { 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 111 PetscErrorCode ierr; 112 PetscInt i,rstart,nrows,*rows; 113 114 PetscFunctionBegin; 115 *zrows = NULL; 116 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 117 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 118 for (i=0; i<nrows; i++) rows[i] += rstart; 119 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 125 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 126 { 127 PetscErrorCode ierr; 128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 129 PetscInt i,n,*garray = aij->garray; 130 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 131 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 132 PetscReal *work; 133 134 PetscFunctionBegin; 135 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 136 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 137 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 CHKMEMQ; 197 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 199 if (!rank) { 200 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 201 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 202 } 203 if (reuse == MAT_INITIAL_MATRIX) { 204 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 205 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 206 if (!rank) { 207 bses[0] = gmat->rmap->bs; 208 bses[1] = gmat->cmap->bs; 209 } 210 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 211 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 212 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 213 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 214 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 215 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 216 217 rowners[0] = 0; 218 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 219 rstart = rowners[rank]; 220 rend = rowners[rank+1]; 221 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 222 if (!rank) { 223 gmata = (Mat_SeqAIJ*) gmat->data; 224 /* send row lengths to all processors */ 225 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 226 for (i=1; i<size; i++) { 227 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 228 } 229 /* determine number diagonal and off-diagonal counts */ 230 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 231 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 232 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 233 jj = 0; 234 for (i=0; i<m; i++) { 235 for (j=0; j<dlens[i]; j++) { 236 if (gmata->j[jj] < rstart) ld[i]++; 237 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 238 jj++; 239 } 240 } 241 /* send column indices to other processes */ 242 for (i=1; i<size; i++) { 243 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 244 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 245 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 246 } 247 248 /* send numerical values to other processes */ 249 for (i=1; i<size; i++) { 250 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 251 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 252 } 253 gmataa = gmata->a; 254 gmataj = gmata->j; 255 256 } else { 257 /* receive row lengths */ 258 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 /* receive column indices */ 260 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 262 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 263 /* determine number diagonal and off-diagonal counts */ 264 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 265 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 266 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 267 jj = 0; 268 for (i=0; i<m; i++) { 269 for (j=0; j<dlens[i]; j++) { 270 if (gmataj[jj] < rstart) ld[i]++; 271 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 272 jj++; 273 } 274 } 275 /* receive numerical values */ 276 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 277 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 278 } 279 /* set preallocation */ 280 for (i=0; i<m; i++) { 281 dlens[i] -= olens[i]; 282 } 283 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 284 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 285 286 for (i=0; i<m; i++) { 287 dlens[i] += olens[i]; 288 } 289 cnt = 0; 290 for (i=0; i<m; i++) { 291 row = rstart + i; 292 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 293 cnt += dlens[i]; 294 } 295 if (rank) { 296 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 297 } 298 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 299 ierr = PetscFree(rowners);CHKERRQ(ierr); 300 301 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 302 303 *inmat = mat; 304 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 307 mat = *inmat; 308 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 309 if (!rank) { 310 /* send numerical values to other processes */ 311 gmata = (Mat_SeqAIJ*) gmat->data; 312 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 313 gmataa = gmata->a; 314 for (i=1; i<size; i++) { 315 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 316 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 317 } 318 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 319 } else { 320 /* receive numerical values from process 0*/ 321 nz = Ad->nz + Ao->nz; 322 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 326 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 327 ad = Ad->a; 328 ao = Ao->a; 329 if (mat->rmap->n) { 330 i = 0; 331 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 for (i=1; i<mat->rmap->n; i++) { 335 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 336 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 337 } 338 i--; 339 if (mat->rmap->n) { 340 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 341 } 342 if (rank) { 343 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 344 } 345 } 346 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 CHKMEMQ; 349 PetscFunctionReturn(0); 350 } 351 352 /* 353 Local utility routine that creates a mapping from the global column 354 number to the local number in the off-diagonal part of the local 355 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 356 a slightly higher hash table cost; without it it is not scalable (each processor 357 has an order N integer array but is fast to acess. 358 */ 359 #undef __FUNCT__ 360 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 361 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 362 { 363 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 364 PetscErrorCode ierr; 365 PetscInt n = aij->B->cmap->n,i; 366 367 PetscFunctionBegin; 368 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 369 #if defined(PETSC_USE_CTABLE) 370 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 371 for (i=0; i<n; i++) { 372 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 373 } 374 #else 375 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 376 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 377 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 378 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 379 #endif 380 PetscFunctionReturn(0); 381 } 382 383 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 384 { \ 385 if (col <= lastcol1) low1 = 0; \ 386 else high1 = nrow1; \ 387 lastcol1 = col;\ 388 while (high1-low1 > 5) { \ 389 t = (low1+high1)/2; \ 390 if (rp1[t] > col) high1 = t; \ 391 else low1 = t; \ 392 } \ 393 for (_i=low1; _i<high1; _i++) { \ 394 if (rp1[_i] > col) break; \ 395 if (rp1[_i] == col) { \ 396 if (addv == ADD_VALUES) ap1[_i] += value; \ 397 else ap1[_i] = value; \ 398 goto a_noinsert; \ 399 } \ 400 } \ 401 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 402 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 403 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 404 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 405 N = nrow1++ - 1; a->nz++; high1++; \ 406 /* shift up all the later entries in this row */ \ 407 for (ii=N; ii>=_i; ii--) { \ 408 rp1[ii+1] = rp1[ii]; \ 409 ap1[ii+1] = ap1[ii]; \ 410 } \ 411 rp1[_i] = col; \ 412 ap1[_i] = value; \ 413 a_noinsert: ; \ 414 ailen[row] = nrow1; \ 415 } 416 417 418 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 419 { \ 420 if (col <= lastcol2) low2 = 0; \ 421 else high2 = nrow2; \ 422 lastcol2 = col; \ 423 while (high2-low2 > 5) { \ 424 t = (low2+high2)/2; \ 425 if (rp2[t] > col) high2 = t; \ 426 else low2 = t; \ 427 } \ 428 for (_i=low2; _i<high2; _i++) { \ 429 if (rp2[_i] > col) break; \ 430 if (rp2[_i] == col) { \ 431 if (addv == ADD_VALUES) ap2[_i] += value; \ 432 else ap2[_i] = value; \ 433 goto b_noinsert; \ 434 } \ 435 } \ 436 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 437 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 438 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 439 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 440 N = nrow2++ - 1; b->nz++; high2++; \ 441 /* shift up all the later entries in this row */ \ 442 for (ii=N; ii>=_i; ii--) { \ 443 rp2[ii+1] = rp2[ii]; \ 444 ap2[ii+1] = ap2[ii]; \ 445 } \ 446 rp2[_i] = col; \ 447 ap2[_i] = value; \ 448 b_noinsert: ; \ 449 bilen[row] = nrow2; \ 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 454 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 455 { 456 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 457 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 458 PetscErrorCode ierr; 459 PetscInt l,*garray = mat->garray,diag; 460 461 PetscFunctionBegin; 462 /* code only works for square matrices A */ 463 464 /* find size of row to the left of the diagonal part */ 465 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 466 row = row - diag; 467 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 468 if (garray[b->j[b->i[row]+l]] > diag) break; 469 } 470 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 471 472 /* diagonal part */ 473 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 474 475 /* right of diagonal part */ 476 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 477 PetscFunctionReturn(0); 478 } 479 480 #undef __FUNCT__ 481 #define __FUNCT__ "MatSetValues_MPIAIJ" 482 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 483 { 484 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 485 PetscScalar value; 486 PetscErrorCode ierr; 487 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 488 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 489 PetscBool roworiented = aij->roworiented; 490 491 /* Some Variables required in the macro */ 492 Mat A = aij->A; 493 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 494 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 495 MatScalar *aa = a->a; 496 PetscBool ignorezeroentries = a->ignorezeroentries; 497 Mat B = aij->B; 498 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 499 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 500 MatScalar *ba = b->a; 501 502 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 503 PetscInt nonew; 504 MatScalar *ap1,*ap2; 505 506 PetscFunctionBegin; 507 if (v) PetscValidScalarPointer(v,6); 508 for (i=0; i<m; i++) { 509 if (im[i] < 0) continue; 510 #if defined(PETSC_USE_DEBUG) 511 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 512 #endif 513 if (im[i] >= rstart && im[i] < rend) { 514 row = im[i] - rstart; 515 lastcol1 = -1; 516 rp1 = aj + ai[row]; 517 ap1 = aa + ai[row]; 518 rmax1 = aimax[row]; 519 nrow1 = ailen[row]; 520 low1 = 0; 521 high1 = nrow1; 522 lastcol2 = -1; 523 rp2 = bj + bi[row]; 524 ap2 = ba + bi[row]; 525 rmax2 = bimax[row]; 526 nrow2 = bilen[row]; 527 low2 = 0; 528 high2 = nrow2; 529 530 for (j=0; j<n; j++) { 531 if (v) { 532 if (roworiented) value = v[i*n+j]; 533 else value = v[i+j*m]; 534 } else value = 0.0; 535 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 536 if (in[j] >= cstart && in[j] < cend) { 537 col = in[j] - cstart; 538 nonew = a->nonew; 539 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 540 } else if (in[j] < 0) continue; 541 #if defined(PETSC_USE_DEBUG) 542 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 543 #endif 544 else { 545 if (mat->was_assembled) { 546 if (!aij->colmap) { 547 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 548 } 549 #if defined(PETSC_USE_CTABLE) 550 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 551 col--; 552 #else 553 col = aij->colmap[in[j]] - 1; 554 #endif 555 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 556 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 557 col = in[j]; 558 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 559 B = aij->B; 560 b = (Mat_SeqAIJ*)B->data; 561 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 562 rp2 = bj + bi[row]; 563 ap2 = ba + bi[row]; 564 rmax2 = bimax[row]; 565 nrow2 = bilen[row]; 566 low2 = 0; 567 high2 = nrow2; 568 bm = aij->B->rmap->n; 569 ba = b->a; 570 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 571 } else col = in[j]; 572 nonew = b->nonew; 573 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 574 } 575 } 576 } else { 577 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 578 if (!aij->donotstash) { 579 mat->assembled = PETSC_FALSE; 580 if (roworiented) { 581 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 582 } else { 583 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 584 } 585 } 586 } 587 } 588 PetscFunctionReturn(0); 589 } 590 591 #undef __FUNCT__ 592 #define __FUNCT__ "MatGetValues_MPIAIJ" 593 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 594 { 595 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 596 PetscErrorCode ierr; 597 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 598 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 599 600 PetscFunctionBegin; 601 for (i=0; i<m; i++) { 602 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 603 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 604 if (idxm[i] >= rstart && idxm[i] < rend) { 605 row = idxm[i] - rstart; 606 for (j=0; j<n; j++) { 607 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 608 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 609 if (idxn[j] >= cstart && idxn[j] < cend) { 610 col = idxn[j] - cstart; 611 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 612 } else { 613 if (!aij->colmap) { 614 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 615 } 616 #if defined(PETSC_USE_CTABLE) 617 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 618 col--; 619 #else 620 col = aij->colmap[idxn[j]] - 1; 621 #endif 622 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 623 else { 624 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 625 } 626 } 627 } 628 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 629 } 630 PetscFunctionReturn(0); 631 } 632 633 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 634 635 #undef __FUNCT__ 636 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 637 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 638 { 639 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 640 PetscErrorCode ierr; 641 PetscInt nstash,reallocs; 642 InsertMode addv; 643 644 PetscFunctionBegin; 645 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 646 647 /* make sure all processors are either in INSERTMODE or ADDMODE */ 648 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 649 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 650 mat->insertmode = addv; /* in case this processor had no cache */ 651 652 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 653 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 654 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 655 PetscFunctionReturn(0); 656 } 657 658 #undef __FUNCT__ 659 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 660 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 661 { 662 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 663 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 664 PetscErrorCode ierr; 665 PetscMPIInt n; 666 PetscInt i,j,rstart,ncols,flg; 667 PetscInt *row,*col; 668 PetscBool other_disassembled; 669 PetscScalar *val; 670 InsertMode addv = mat->insertmode; 671 672 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 673 674 PetscFunctionBegin; 675 if (!aij->donotstash && !mat->nooffprocentries) { 676 while (1) { 677 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 678 if (!flg) break; 679 680 for (i=0; i<n; ) { 681 /* Now identify the consecutive vals belonging to the same row */ 682 for (j=i,rstart=row[j]; j<n; j++) { 683 if (row[j] != rstart) break; 684 } 685 if (j < n) ncols = j-i; 686 else ncols = n-i; 687 /* Now assemble all these values with a single function call */ 688 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 689 690 i = j; 691 } 692 } 693 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 694 } 695 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 696 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 697 698 /* determine if any processor has disassembled, if so we must 699 also disassemble ourselfs, in order that we may reassemble. */ 700 /* 701 if nonzero structure of submatrix B cannot change then we know that 702 no processor disassembled thus we can skip this stuff 703 */ 704 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 705 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 706 if (mat->was_assembled && !other_disassembled) { 707 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 708 } 709 } 710 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 711 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 712 } 713 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 714 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 715 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 716 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 717 718 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 719 720 aij->rowvalues = 0; 721 722 /* used by MatAXPY() */ 723 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 724 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 725 726 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 727 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 733 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 734 { 735 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 740 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatZeroRows_MPIAIJ" 746 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 747 { 748 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 749 PetscErrorCode ierr; 750 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 751 PetscInt i,*owners = A->rmap->range; 752 PetscInt *nprocs,j,idx,nsends,row; 753 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 754 PetscInt *rvalues,count,base,slen,*source; 755 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 756 MPI_Comm comm; 757 MPI_Request *send_waits,*recv_waits; 758 MPI_Status recv_status,*send_status; 759 const PetscScalar *xx; 760 PetscScalar *bb; 761 #if defined(PETSC_DEBUG) 762 PetscBool found = PETSC_FALSE; 763 #endif 764 765 PetscFunctionBegin; 766 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 767 /* first count number of contributors to each processor */ 768 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 769 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 770 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 771 j = 0; 772 for (i=0; i<N; i++) { 773 if (lastidx > (idx = rows[i])) j = 0; 774 lastidx = idx; 775 for (; j<size; j++) { 776 if (idx >= owners[j] && idx < owners[j+1]) { 777 nprocs[2*j]++; 778 nprocs[2*j+1] = 1; 779 owner[i] = j; 780 #if defined(PETSC_DEBUG) 781 found = PETSC_TRUE; 782 #endif 783 break; 784 } 785 } 786 #if defined(PETSC_DEBUG) 787 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 788 found = PETSC_FALSE; 789 #endif 790 } 791 nsends = 0; 792 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 793 794 if (A->nooffproczerorows) { 795 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 796 nrecvs = nsends; 797 nmax = N; 798 } else { 799 /* inform other processors of number of messages and max length*/ 800 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 801 } 802 803 /* post receives: */ 804 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 805 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 806 for (i=0; i<nrecvs; i++) { 807 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 808 } 809 810 /* do sends: 811 1) starts[i] gives the starting index in svalues for stuff going to 812 the ith processor 813 */ 814 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 815 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 816 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 817 818 starts[0] = 0; 819 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 820 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 821 822 starts[0] = 0; 823 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 824 count = 0; 825 for (i=0; i<size; i++) { 826 if (nprocs[2*i+1]) { 827 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 828 } 829 } 830 ierr = PetscFree(starts);CHKERRQ(ierr); 831 832 base = owners[rank]; 833 834 /* wait on receives */ 835 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 836 count = nrecvs; slen = 0; 837 while (count) { 838 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 839 /* unpack receives into our local space */ 840 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 841 842 source[imdex] = recv_status.MPI_SOURCE; 843 lens[imdex] = n; 844 slen += n; 845 count--; 846 } 847 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 848 849 /* move the data into the send scatter */ 850 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 851 count = 0; 852 for (i=0; i<nrecvs; i++) { 853 values = rvalues + i*nmax; 854 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 855 } 856 ierr = PetscFree(rvalues);CHKERRQ(ierr); 857 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 858 ierr = PetscFree(owner);CHKERRQ(ierr); 859 ierr = PetscFree(nprocs);CHKERRQ(ierr); 860 861 /* fix right hand side if needed */ 862 if (x && b) { 863 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 864 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 865 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 866 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 867 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 868 } 869 /* 870 Zero the required rows. If the "diagonal block" of the matrix 871 is square and the user wishes to set the diagonal we use separate 872 code so that MatSetValues() is not called for each diagonal allocating 873 new memory, thus calling lots of mallocs and slowing things down. 874 875 */ 876 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 877 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 878 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 879 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 880 } else if (diag != 0.0) { 881 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 882 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 883 for (i = 0; i < slen; i++) { 884 row = lrows[i] + rstart; 885 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 886 } 887 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 889 } else { 890 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 891 } 892 ierr = PetscFree(lrows);CHKERRQ(ierr); 893 894 /* wait on sends */ 895 if (nsends) { 896 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 897 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 898 ierr = PetscFree(send_status);CHKERRQ(ierr); 899 } 900 ierr = PetscFree(send_waits);CHKERRQ(ierr); 901 ierr = PetscFree(svalues);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } 904 905 #undef __FUNCT__ 906 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 907 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 908 { 909 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 910 PetscErrorCode ierr; 911 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 912 PetscInt i,*owners = A->rmap->range; 913 PetscInt *nprocs,j,idx,nsends; 914 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 915 PetscInt *rvalues,count,base,slen,*source; 916 PetscInt *lens,*lrows,*values,m; 917 MPI_Comm comm; 918 MPI_Request *send_waits,*recv_waits; 919 MPI_Status recv_status,*send_status; 920 const PetscScalar *xx; 921 PetscScalar *bb,*mask; 922 Vec xmask,lmask; 923 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 924 const PetscInt *aj, *ii,*ridx; 925 PetscScalar *aa; 926 #if defined(PETSC_DEBUG) 927 PetscBool found = PETSC_FALSE; 928 #endif 929 930 PetscFunctionBegin; 931 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 932 /* first count number of contributors to each processor */ 933 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 934 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 935 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 936 j = 0; 937 for (i=0; i<N; i++) { 938 if (lastidx > (idx = rows[i])) j = 0; 939 lastidx = idx; 940 for (; j<size; j++) { 941 if (idx >= owners[j] && idx < owners[j+1]) { 942 nprocs[2*j]++; 943 nprocs[2*j+1] = 1; 944 owner[i] = j; 945 #if defined(PETSC_DEBUG) 946 found = PETSC_TRUE; 947 #endif 948 break; 949 } 950 } 951 #if defined(PETSC_DEBUG) 952 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 953 found = PETSC_FALSE; 954 #endif 955 } 956 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 957 958 /* inform other processors of number of messages and max length*/ 959 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 960 961 /* post receives: */ 962 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 963 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 964 for (i=0; i<nrecvs; i++) { 965 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 966 } 967 968 /* do sends: 969 1) starts[i] gives the starting index in svalues for stuff going to 970 the ith processor 971 */ 972 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 973 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 974 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 975 976 starts[0] = 0; 977 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 978 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 979 980 starts[0] = 0; 981 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 982 count = 0; 983 for (i=0; i<size; i++) { 984 if (nprocs[2*i+1]) { 985 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 986 } 987 } 988 ierr = PetscFree(starts);CHKERRQ(ierr); 989 990 base = owners[rank]; 991 992 /* wait on receives */ 993 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 994 count = nrecvs; slen = 0; 995 while (count) { 996 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 997 /* unpack receives into our local space */ 998 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 999 1000 source[imdex] = recv_status.MPI_SOURCE; 1001 lens[imdex] = n; 1002 slen += n; 1003 count--; 1004 } 1005 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1006 1007 /* move the data into the send scatter */ 1008 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1009 count = 0; 1010 for (i=0; i<nrecvs; i++) { 1011 values = rvalues + i*nmax; 1012 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1013 } 1014 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1015 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1016 ierr = PetscFree(owner);CHKERRQ(ierr); 1017 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1018 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1019 1020 /* zero diagonal part of matrix */ 1021 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1022 1023 /* handle off diagonal part of matrix */ 1024 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1025 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1026 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1027 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1028 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1029 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1031 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1032 if (x) { 1033 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1035 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1036 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1037 } 1038 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1039 1040 /* remove zeroed rows of off diagonal matrix */ 1041 ii = aij->i; 1042 for (i=0; i<slen; i++) { 1043 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1044 } 1045 1046 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1047 if (aij->compressedrow.use) { 1048 m = aij->compressedrow.nrows; 1049 ii = aij->compressedrow.i; 1050 ridx = aij->compressedrow.rindex; 1051 for (i=0; i<m; i++) { 1052 n = ii[i+1] - ii[i]; 1053 aj = aij->j + ii[i]; 1054 aa = aij->a + ii[i]; 1055 1056 for (j=0; j<n; j++) { 1057 if (PetscAbsScalar(mask[*aj])) { 1058 if (b) bb[*ridx] -= *aa*xx[*aj]; 1059 *aa = 0.0; 1060 } 1061 aa++; 1062 aj++; 1063 } 1064 ridx++; 1065 } 1066 } else { /* do not use compressed row format */ 1067 m = l->B->rmap->n; 1068 for (i=0; i<m; i++) { 1069 n = ii[i+1] - ii[i]; 1070 aj = aij->j + ii[i]; 1071 aa = aij->a + ii[i]; 1072 for (j=0; j<n; j++) { 1073 if (PetscAbsScalar(mask[*aj])) { 1074 if (b) bb[i] -= *aa*xx[*aj]; 1075 *aa = 0.0; 1076 } 1077 aa++; 1078 aj++; 1079 } 1080 } 1081 } 1082 if (x) { 1083 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1084 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1085 } 1086 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1087 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1088 ierr = PetscFree(lrows);CHKERRQ(ierr); 1089 1090 /* wait on sends */ 1091 if (nsends) { 1092 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1093 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1094 ierr = PetscFree(send_status);CHKERRQ(ierr); 1095 } 1096 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1097 ierr = PetscFree(svalues);CHKERRQ(ierr); 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatMult_MPIAIJ" 1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1104 { 1105 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 PetscInt nt; 1108 1109 PetscFunctionBegin; 1110 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1111 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1112 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1113 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1114 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1115 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1121 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1122 { 1123 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 #undef __FUNCT__ 1132 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1133 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1134 { 1135 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1136 PetscErrorCode ierr; 1137 1138 PetscFunctionBegin; 1139 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1141 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1142 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1148 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1149 { 1150 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1151 PetscErrorCode ierr; 1152 PetscBool merged; 1153 1154 PetscFunctionBegin; 1155 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1156 /* do nondiagonal part */ 1157 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1158 if (!merged) { 1159 /* send it on its way */ 1160 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1161 /* do local part */ 1162 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1163 /* receive remote parts: note this assumes the values are not actually */ 1164 /* added in yy until the next line, */ 1165 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1166 } else { 1167 /* do local part */ 1168 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1169 /* send it on its way */ 1170 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1171 /* values actually were received in the Begin() but we need to call this nop */ 1172 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1173 } 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1179 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1180 { 1181 MPI_Comm comm; 1182 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1183 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1184 IS Me,Notme; 1185 PetscErrorCode ierr; 1186 PetscInt M,N,first,last,*notme,i; 1187 PetscMPIInt size; 1188 1189 PetscFunctionBegin; 1190 /* Easy test: symmetric diagonal block */ 1191 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1192 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1193 if (!*f) PetscFunctionReturn(0); 1194 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1196 if (size == 1) PetscFunctionReturn(0); 1197 1198 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1199 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1200 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1201 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1202 for (i=0; i<first; i++) notme[i] = i; 1203 for (i=last; i<M; i++) notme[i-last+first] = i; 1204 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1205 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1206 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1207 Aoff = Aoffs[0]; 1208 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1209 Boff = Boffs[0]; 1210 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1211 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1212 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1213 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1214 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1215 ierr = PetscFree(notme);CHKERRQ(ierr); 1216 PetscFunctionReturn(0); 1217 } 1218 1219 #undef __FUNCT__ 1220 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1221 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1222 { 1223 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1224 PetscErrorCode ierr; 1225 1226 PetscFunctionBegin; 1227 /* do nondiagonal part */ 1228 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1229 /* send it on its way */ 1230 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1231 /* do local part */ 1232 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1233 /* receive remote parts */ 1234 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 /* 1239 This only works correctly for square matrices where the subblock A->A is the 1240 diagonal block 1241 */ 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1244 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1245 { 1246 PetscErrorCode ierr; 1247 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1248 1249 PetscFunctionBegin; 1250 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1251 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1252 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "MatScale_MPIAIJ" 1258 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1259 { 1260 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1261 PetscErrorCode ierr; 1262 1263 PetscFunctionBegin; 1264 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1265 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatDestroy_MPIAIJ" 1271 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1272 { 1273 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1274 PetscErrorCode ierr; 1275 1276 PetscFunctionBegin; 1277 #if defined(PETSC_USE_LOG) 1278 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1279 #endif 1280 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1281 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1282 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1283 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1284 #if defined(PETSC_USE_CTABLE) 1285 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1286 #else 1287 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1288 #endif 1289 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1290 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1291 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1292 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1293 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1294 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1295 1296 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",NULL);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1311 { 1312 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1313 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1314 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1315 PetscErrorCode ierr; 1316 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1317 int fd; 1318 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1319 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1320 PetscScalar *column_values; 1321 PetscInt message_count,flowcontrolcount; 1322 FILE *file; 1323 1324 PetscFunctionBegin; 1325 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1326 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1327 nz = A->nz + B->nz; 1328 if (!rank) { 1329 header[0] = MAT_FILE_CLASSID; 1330 header[1] = mat->rmap->N; 1331 header[2] = mat->cmap->N; 1332 1333 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1334 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1335 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1336 /* get largest number of rows any processor has */ 1337 rlen = mat->rmap->n; 1338 range = mat->rmap->range; 1339 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1340 } else { 1341 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1342 rlen = mat->rmap->n; 1343 } 1344 1345 /* load up the local row counts */ 1346 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1347 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1348 1349 /* store the row lengths to the file */ 1350 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1351 if (!rank) { 1352 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1353 for (i=1; i<size; i++) { 1354 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1355 rlen = range[i+1] - range[i]; 1356 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1357 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1358 } 1359 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1360 } else { 1361 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1362 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1363 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1364 } 1365 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1366 1367 /* load up the local column indices */ 1368 nzmax = nz; /* th processor needs space a largest processor needs */ 1369 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1370 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1371 cnt = 0; 1372 for (i=0; i<mat->rmap->n; i++) { 1373 for (j=B->i[i]; j<B->i[i+1]; j++) { 1374 if ((col = garray[B->j[j]]) > cstart) break; 1375 column_indices[cnt++] = col; 1376 } 1377 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1378 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1379 } 1380 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1381 1382 /* store the column indices to the file */ 1383 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1384 if (!rank) { 1385 MPI_Status status; 1386 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1387 for (i=1; i<size; i++) { 1388 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1389 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1390 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1391 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1392 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1393 } 1394 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1395 } else { 1396 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1397 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1398 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1399 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1400 } 1401 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1402 1403 /* load up the local column values */ 1404 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1405 cnt = 0; 1406 for (i=0; i<mat->rmap->n; i++) { 1407 for (j=B->i[i]; j<B->i[i+1]; j++) { 1408 if (garray[B->j[j]] > cstart) break; 1409 column_values[cnt++] = B->a[j]; 1410 } 1411 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1412 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1413 } 1414 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1415 1416 /* store the column values to the file */ 1417 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1418 if (!rank) { 1419 MPI_Status status; 1420 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1421 for (i=1; i<size; i++) { 1422 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1423 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1424 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1425 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1426 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1427 } 1428 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1429 } else { 1430 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1431 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1432 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1433 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1434 } 1435 ierr = PetscFree(column_values);CHKERRQ(ierr); 1436 1437 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1438 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1439 PetscFunctionReturn(0); 1440 } 1441 1442 #include <petscdraw.h> 1443 #undef __FUNCT__ 1444 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1445 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1446 { 1447 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1448 PetscErrorCode ierr; 1449 PetscMPIInt rank = aij->rank,size = aij->size; 1450 PetscBool isdraw,iascii,isbinary; 1451 PetscViewer sviewer; 1452 PetscViewerFormat format; 1453 1454 PetscFunctionBegin; 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1456 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1457 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1458 if (iascii) { 1459 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1460 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1461 MatInfo info; 1462 PetscBool inodes; 1463 1464 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1465 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1466 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1467 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1468 if (!inodes) { 1469 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1470 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1471 } else { 1472 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1473 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1474 } 1475 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1476 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1477 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1478 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1479 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1480 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1481 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1482 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1483 PetscFunctionReturn(0); 1484 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1485 PetscInt inodecount,inodelimit,*inodes; 1486 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1487 if (inodes) { 1488 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1489 } else { 1490 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1491 } 1492 PetscFunctionReturn(0); 1493 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1494 PetscFunctionReturn(0); 1495 } 1496 } else if (isbinary) { 1497 if (size == 1) { 1498 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1499 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1500 } else { 1501 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1502 } 1503 PetscFunctionReturn(0); 1504 } else if (isdraw) { 1505 PetscDraw draw; 1506 PetscBool isnull; 1507 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1508 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1509 } 1510 1511 if (size == 1) { 1512 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1513 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1514 } else { 1515 /* assemble the entire matrix onto first processor. */ 1516 Mat A; 1517 Mat_SeqAIJ *Aloc; 1518 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1519 MatScalar *a; 1520 1521 if (mat->rmap->N > 1024) { 1522 PetscBool flg = PETSC_FALSE; 1523 1524 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1525 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1526 } 1527 1528 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1529 if (!rank) { 1530 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1531 } else { 1532 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1533 } 1534 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1535 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1536 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1537 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1538 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1539 1540 /* copy over the A part */ 1541 Aloc = (Mat_SeqAIJ*)aij->A->data; 1542 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1543 row = mat->rmap->rstart; 1544 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1545 for (i=0; i<m; i++) { 1546 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1547 row++; 1548 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1549 } 1550 aj = Aloc->j; 1551 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1552 1553 /* copy over the B part */ 1554 Aloc = (Mat_SeqAIJ*)aij->B->data; 1555 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1556 row = mat->rmap->rstart; 1557 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1558 ct = cols; 1559 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1560 for (i=0; i<m; i++) { 1561 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1562 row++; 1563 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1564 } 1565 ierr = PetscFree(ct);CHKERRQ(ierr); 1566 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1567 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1568 /* 1569 Everyone has to call to draw the matrix since the graphics waits are 1570 synchronized across all processors that share the PetscDraw object 1571 */ 1572 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1573 if (!rank) { 1574 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1575 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1576 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1577 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1578 } 1579 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1580 ierr = MatDestroy(&A);CHKERRQ(ierr); 1581 } 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatView_MPIAIJ" 1587 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1588 { 1589 PetscErrorCode ierr; 1590 PetscBool iascii,isdraw,issocket,isbinary; 1591 1592 PetscFunctionBegin; 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1595 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1596 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1597 if (iascii || isdraw || isbinary || issocket) { 1598 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1599 } 1600 PetscFunctionReturn(0); 1601 } 1602 1603 #undef __FUNCT__ 1604 #define __FUNCT__ "MatSOR_MPIAIJ" 1605 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1606 { 1607 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1608 PetscErrorCode ierr; 1609 Vec bb1 = 0; 1610 PetscBool hasop; 1611 1612 PetscFunctionBegin; 1613 if (flag == SOR_APPLY_UPPER) { 1614 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1615 PetscFunctionReturn(0); 1616 } 1617 1618 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1619 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1620 } 1621 1622 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1623 if (flag & SOR_ZERO_INITIAL_GUESS) { 1624 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1625 its--; 1626 } 1627 1628 while (its--) { 1629 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1631 1632 /* update rhs: bb1 = bb - B*x */ 1633 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1634 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1635 1636 /* local sweep */ 1637 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1638 } 1639 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1640 if (flag & SOR_ZERO_INITIAL_GUESS) { 1641 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1642 its--; 1643 } 1644 while (its--) { 1645 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1647 1648 /* update rhs: bb1 = bb - B*x */ 1649 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1650 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1651 1652 /* local sweep */ 1653 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1654 } 1655 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1656 if (flag & SOR_ZERO_INITIAL_GUESS) { 1657 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1658 its--; 1659 } 1660 while (its--) { 1661 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1663 1664 /* update rhs: bb1 = bb - B*x */ 1665 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1666 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1667 1668 /* local sweep */ 1669 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1670 } 1671 } else if (flag & SOR_EISENSTAT) { 1672 Vec xx1; 1673 1674 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1675 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1676 1677 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1679 if (!mat->diag) { 1680 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1681 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1682 } 1683 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1684 if (hasop) { 1685 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1686 } else { 1687 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1688 } 1689 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1690 1691 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1692 1693 /* local sweep */ 1694 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1695 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1696 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1697 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1698 1699 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1700 PetscFunctionReturn(0); 1701 } 1702 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "MatPermute_MPIAIJ" 1705 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1706 { 1707 Mat aA,aB,Aperm; 1708 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1709 PetscScalar *aa,*ba; 1710 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1711 PetscSF rowsf,sf; 1712 IS parcolp = NULL; 1713 PetscBool done; 1714 PetscErrorCode ierr; 1715 1716 PetscFunctionBegin; 1717 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1718 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1719 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1720 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1721 1722 /* Invert row permutation to find out where my rows should go */ 1723 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1724 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1725 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1726 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1727 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1728 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1729 1730 /* Invert column permutation to find out where my columns should go */ 1731 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1732 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1733 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1734 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1735 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1736 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1737 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1738 1739 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1740 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1741 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1742 1743 /* Find out where my gcols should go */ 1744 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1745 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1746 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1747 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1748 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1749 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1750 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1751 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1752 1753 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1754 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1755 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1756 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1757 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1758 for (i=0; i<m; i++) { 1759 PetscInt row = rdest[i],rowner; 1760 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1761 for (j=ai[i]; j<ai[i+1]; j++) { 1762 PetscInt cowner,col = cdest[aj[j]]; 1763 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1764 if (rowner == cowner) dnnz[i]++; 1765 else onnz[i]++; 1766 } 1767 for (j=bi[i]; j<bi[i+1]; j++) { 1768 PetscInt cowner,col = gcdest[bj[j]]; 1769 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1770 if (rowner == cowner) dnnz[i]++; 1771 else onnz[i]++; 1772 } 1773 } 1774 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1775 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1776 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1777 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1778 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1779 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1780 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1781 1782 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1783 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1784 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1785 for (i=0; i<m; i++) { 1786 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1787 PetscInt rowlen; 1788 rowlen = ai[i+1] - ai[i]; 1789 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1790 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1791 rowlen = bi[i+1] - bi[i]; 1792 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1793 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1794 } 1795 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1797 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1798 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1799 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1800 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1801 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1802 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1803 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1804 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1805 *B = Aperm; 1806 PetscFunctionReturn(0); 1807 } 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1811 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1812 { 1813 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1814 Mat A = mat->A,B = mat->B; 1815 PetscErrorCode ierr; 1816 PetscReal isend[5],irecv[5]; 1817 1818 PetscFunctionBegin; 1819 info->block_size = 1.0; 1820 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1821 1822 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1823 isend[3] = info->memory; isend[4] = info->mallocs; 1824 1825 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1826 1827 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1828 isend[3] += info->memory; isend[4] += info->mallocs; 1829 if (flag == MAT_LOCAL) { 1830 info->nz_used = isend[0]; 1831 info->nz_allocated = isend[1]; 1832 info->nz_unneeded = isend[2]; 1833 info->memory = isend[3]; 1834 info->mallocs = isend[4]; 1835 } else if (flag == MAT_GLOBAL_MAX) { 1836 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1837 1838 info->nz_used = irecv[0]; 1839 info->nz_allocated = irecv[1]; 1840 info->nz_unneeded = irecv[2]; 1841 info->memory = irecv[3]; 1842 info->mallocs = irecv[4]; 1843 } else if (flag == MAT_GLOBAL_SUM) { 1844 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1845 1846 info->nz_used = irecv[0]; 1847 info->nz_allocated = irecv[1]; 1848 info->nz_unneeded = irecv[2]; 1849 info->memory = irecv[3]; 1850 info->mallocs = irecv[4]; 1851 } 1852 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1853 info->fill_ratio_needed = 0; 1854 info->factor_mallocs = 0; 1855 PetscFunctionReturn(0); 1856 } 1857 1858 #undef __FUNCT__ 1859 #define __FUNCT__ "MatSetOption_MPIAIJ" 1860 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1861 { 1862 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1863 PetscErrorCode ierr; 1864 1865 PetscFunctionBegin; 1866 switch (op) { 1867 case MAT_NEW_NONZERO_LOCATIONS: 1868 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1869 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1870 case MAT_KEEP_NONZERO_PATTERN: 1871 case MAT_NEW_NONZERO_LOCATION_ERR: 1872 case MAT_USE_INODES: 1873 case MAT_IGNORE_ZERO_ENTRIES: 1874 MatCheckPreallocated(A,1); 1875 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1876 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1877 break; 1878 case MAT_ROW_ORIENTED: 1879 a->roworiented = flg; 1880 1881 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1882 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1883 break; 1884 case MAT_NEW_DIAGONALS: 1885 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1886 break; 1887 case MAT_IGNORE_OFF_PROC_ENTRIES: 1888 a->donotstash = flg; 1889 break; 1890 case MAT_SPD: 1891 A->spd_set = PETSC_TRUE; 1892 A->spd = flg; 1893 if (flg) { 1894 A->symmetric = PETSC_TRUE; 1895 A->structurally_symmetric = PETSC_TRUE; 1896 A->symmetric_set = PETSC_TRUE; 1897 A->structurally_symmetric_set = PETSC_TRUE; 1898 } 1899 break; 1900 case MAT_SYMMETRIC: 1901 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1902 break; 1903 case MAT_STRUCTURALLY_SYMMETRIC: 1904 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1905 break; 1906 case MAT_HERMITIAN: 1907 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1908 break; 1909 case MAT_SYMMETRY_ETERNAL: 1910 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1911 break; 1912 default: 1913 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1914 } 1915 PetscFunctionReturn(0); 1916 } 1917 1918 #undef __FUNCT__ 1919 #define __FUNCT__ "MatGetRow_MPIAIJ" 1920 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1921 { 1922 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1923 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1924 PetscErrorCode ierr; 1925 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1926 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1927 PetscInt *cmap,*idx_p; 1928 1929 PetscFunctionBegin; 1930 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1931 mat->getrowactive = PETSC_TRUE; 1932 1933 if (!mat->rowvalues && (idx || v)) { 1934 /* 1935 allocate enough space to hold information from the longest row. 1936 */ 1937 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1938 PetscInt max = 1,tmp; 1939 for (i=0; i<matin->rmap->n; i++) { 1940 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1941 if (max < tmp) max = tmp; 1942 } 1943 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1944 } 1945 1946 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1947 lrow = row - rstart; 1948 1949 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1950 if (!v) {pvA = 0; pvB = 0;} 1951 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1952 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1953 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1954 nztot = nzA + nzB; 1955 1956 cmap = mat->garray; 1957 if (v || idx) { 1958 if (nztot) { 1959 /* Sort by increasing column numbers, assuming A and B already sorted */ 1960 PetscInt imark = -1; 1961 if (v) { 1962 *v = v_p = mat->rowvalues; 1963 for (i=0; i<nzB; i++) { 1964 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1965 else break; 1966 } 1967 imark = i; 1968 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1969 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1970 } 1971 if (idx) { 1972 *idx = idx_p = mat->rowindices; 1973 if (imark > -1) { 1974 for (i=0; i<imark; i++) { 1975 idx_p[i] = cmap[cworkB[i]]; 1976 } 1977 } else { 1978 for (i=0; i<nzB; i++) { 1979 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1980 else break; 1981 } 1982 imark = i; 1983 } 1984 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1985 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1986 } 1987 } else { 1988 if (idx) *idx = 0; 1989 if (v) *v = 0; 1990 } 1991 } 1992 *nz = nztot; 1993 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1994 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1995 PetscFunctionReturn(0); 1996 } 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 2000 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2001 { 2002 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2003 2004 PetscFunctionBegin; 2005 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2006 aij->getrowactive = PETSC_FALSE; 2007 PetscFunctionReturn(0); 2008 } 2009 2010 #undef __FUNCT__ 2011 #define __FUNCT__ "MatNorm_MPIAIJ" 2012 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2013 { 2014 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2015 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2016 PetscErrorCode ierr; 2017 PetscInt i,j,cstart = mat->cmap->rstart; 2018 PetscReal sum = 0.0; 2019 MatScalar *v; 2020 2021 PetscFunctionBegin; 2022 if (aij->size == 1) { 2023 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2024 } else { 2025 if (type == NORM_FROBENIUS) { 2026 v = amat->a; 2027 for (i=0; i<amat->nz; i++) { 2028 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2029 } 2030 v = bmat->a; 2031 for (i=0; i<bmat->nz; i++) { 2032 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2033 } 2034 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2035 *norm = PetscSqrtReal(*norm); 2036 } else if (type == NORM_1) { /* max column norm */ 2037 PetscReal *tmp,*tmp2; 2038 PetscInt *jj,*garray = aij->garray; 2039 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2040 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2041 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2042 *norm = 0.0; 2043 v = amat->a; jj = amat->j; 2044 for (j=0; j<amat->nz; j++) { 2045 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2046 } 2047 v = bmat->a; jj = bmat->j; 2048 for (j=0; j<bmat->nz; j++) { 2049 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2050 } 2051 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2052 for (j=0; j<mat->cmap->N; j++) { 2053 if (tmp2[j] > *norm) *norm = tmp2[j]; 2054 } 2055 ierr = PetscFree(tmp);CHKERRQ(ierr); 2056 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2057 } else if (type == NORM_INFINITY) { /* max row norm */ 2058 PetscReal ntemp = 0.0; 2059 for (j=0; j<aij->A->rmap->n; j++) { 2060 v = amat->a + amat->i[j]; 2061 sum = 0.0; 2062 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2063 sum += PetscAbsScalar(*v); v++; 2064 } 2065 v = bmat->a + bmat->i[j]; 2066 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2067 sum += PetscAbsScalar(*v); v++; 2068 } 2069 if (sum > ntemp) ntemp = sum; 2070 } 2071 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2072 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2073 } 2074 PetscFunctionReturn(0); 2075 } 2076 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatTranspose_MPIAIJ" 2079 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2080 { 2081 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2082 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2083 PetscErrorCode ierr; 2084 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2085 PetscInt cstart = A->cmap->rstart,ncol; 2086 Mat B; 2087 MatScalar *array; 2088 2089 PetscFunctionBegin; 2090 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2091 2092 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2093 ai = Aloc->i; aj = Aloc->j; 2094 bi = Bloc->i; bj = Bloc->j; 2095 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2096 PetscInt *d_nnz,*g_nnz,*o_nnz; 2097 PetscSFNode *oloc; 2098 PETSC_UNUSED PetscSF sf; 2099 2100 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2101 /* compute d_nnz for preallocation */ 2102 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2103 for (i=0; i<ai[ma]; i++) { 2104 d_nnz[aj[i]]++; 2105 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2106 } 2107 /* compute local off-diagonal contributions */ 2108 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2109 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2110 /* map those to global */ 2111 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2112 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2113 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2114 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2115 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2116 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2117 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2118 2119 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2120 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2121 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2122 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2123 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2124 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2125 } else { 2126 B = *matout; 2127 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2128 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2129 } 2130 2131 /* copy over the A part */ 2132 array = Aloc->a; 2133 row = A->rmap->rstart; 2134 for (i=0; i<ma; i++) { 2135 ncol = ai[i+1]-ai[i]; 2136 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2137 row++; 2138 array += ncol; aj += ncol; 2139 } 2140 aj = Aloc->j; 2141 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2142 2143 /* copy over the B part */ 2144 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2145 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2146 array = Bloc->a; 2147 row = A->rmap->rstart; 2148 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2149 cols_tmp = cols; 2150 for (i=0; i<mb; i++) { 2151 ncol = bi[i+1]-bi[i]; 2152 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2153 row++; 2154 array += ncol; cols_tmp += ncol; 2155 } 2156 ierr = PetscFree(cols);CHKERRQ(ierr); 2157 2158 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2160 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2161 *matout = B; 2162 } else { 2163 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2164 } 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2170 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2171 { 2172 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2173 Mat a = aij->A,b = aij->B; 2174 PetscErrorCode ierr; 2175 PetscInt s1,s2,s3; 2176 2177 PetscFunctionBegin; 2178 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2179 if (rr) { 2180 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2181 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2182 /* Overlap communication with computation. */ 2183 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2184 } 2185 if (ll) { 2186 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2187 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2188 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2189 } 2190 /* scale the diagonal block */ 2191 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2192 2193 if (rr) { 2194 /* Do a scatter end and then right scale the off-diagonal block */ 2195 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2196 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2197 } 2198 PetscFunctionReturn(0); 2199 } 2200 2201 #undef __FUNCT__ 2202 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2203 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2204 { 2205 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2206 PetscErrorCode ierr; 2207 2208 PetscFunctionBegin; 2209 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2210 PetscFunctionReturn(0); 2211 } 2212 2213 #undef __FUNCT__ 2214 #define __FUNCT__ "MatEqual_MPIAIJ" 2215 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2216 { 2217 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2218 Mat a,b,c,d; 2219 PetscBool flg; 2220 PetscErrorCode ierr; 2221 2222 PetscFunctionBegin; 2223 a = matA->A; b = matA->B; 2224 c = matB->A; d = matB->B; 2225 2226 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2227 if (flg) { 2228 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2229 } 2230 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2231 PetscFunctionReturn(0); 2232 } 2233 2234 #undef __FUNCT__ 2235 #define __FUNCT__ "MatCopy_MPIAIJ" 2236 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2237 { 2238 PetscErrorCode ierr; 2239 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2240 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2241 2242 PetscFunctionBegin; 2243 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2244 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2245 /* because of the column compression in the off-processor part of the matrix a->B, 2246 the number of columns in a->B and b->B may be different, hence we cannot call 2247 the MatCopy() directly on the two parts. If need be, we can provide a more 2248 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2249 then copying the submatrices */ 2250 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2251 } else { 2252 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2253 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2254 } 2255 PetscFunctionReturn(0); 2256 } 2257 2258 #undef __FUNCT__ 2259 #define __FUNCT__ "MatSetUp_MPIAIJ" 2260 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2261 { 2262 PetscErrorCode ierr; 2263 2264 PetscFunctionBegin; 2265 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2266 PetscFunctionReturn(0); 2267 } 2268 2269 #undef __FUNCT__ 2270 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2271 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2272 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2273 { 2274 PetscInt i,m=Y->rmap->N; 2275 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2276 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2277 const PetscInt *xi = x->i,*yi = y->i; 2278 2279 PetscFunctionBegin; 2280 /* Set the number of nonzeros in the new matrix */ 2281 for (i=0; i<m; i++) { 2282 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2283 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2284 nnz[i] = 0; 2285 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2286 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2287 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2288 nnz[i]++; 2289 } 2290 for (; k<nzy; k++) nnz[i]++; 2291 } 2292 PetscFunctionReturn(0); 2293 } 2294 2295 #undef __FUNCT__ 2296 #define __FUNCT__ "MatAXPY_MPIAIJ" 2297 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2298 { 2299 PetscErrorCode ierr; 2300 PetscInt i; 2301 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2302 PetscBLASInt bnz,one=1; 2303 Mat_SeqAIJ *x,*y; 2304 2305 PetscFunctionBegin; 2306 if (str == SAME_NONZERO_PATTERN) { 2307 PetscScalar alpha = a; 2308 x = (Mat_SeqAIJ*)xx->A->data; 2309 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2310 y = (Mat_SeqAIJ*)yy->A->data; 2311 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2312 x = (Mat_SeqAIJ*)xx->B->data; 2313 y = (Mat_SeqAIJ*)yy->B->data; 2314 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2315 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2316 } else if (str == SUBSET_NONZERO_PATTERN) { 2317 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2318 2319 x = (Mat_SeqAIJ*)xx->B->data; 2320 y = (Mat_SeqAIJ*)yy->B->data; 2321 if (y->xtoy && y->XtoY != xx->B) { 2322 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2323 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2324 } 2325 if (!y->xtoy) { /* get xtoy */ 2326 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2327 y->XtoY = xx->B; 2328 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2329 } 2330 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2331 } else { 2332 Mat B; 2333 PetscInt *nnz_d,*nnz_o; 2334 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2335 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2336 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2337 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2338 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2339 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2340 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2341 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2342 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2343 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2344 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2345 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2346 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2347 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2348 } 2349 PetscFunctionReturn(0); 2350 } 2351 2352 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2353 2354 #undef __FUNCT__ 2355 #define __FUNCT__ "MatConjugate_MPIAIJ" 2356 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2357 { 2358 #if defined(PETSC_USE_COMPLEX) 2359 PetscErrorCode ierr; 2360 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2361 2362 PetscFunctionBegin; 2363 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2364 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2365 #else 2366 PetscFunctionBegin; 2367 #endif 2368 PetscFunctionReturn(0); 2369 } 2370 2371 #undef __FUNCT__ 2372 #define __FUNCT__ "MatRealPart_MPIAIJ" 2373 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2374 { 2375 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2376 PetscErrorCode ierr; 2377 2378 PetscFunctionBegin; 2379 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2380 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2381 PetscFunctionReturn(0); 2382 } 2383 2384 #undef __FUNCT__ 2385 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2386 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2387 { 2388 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2389 PetscErrorCode ierr; 2390 2391 PetscFunctionBegin; 2392 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2393 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2394 PetscFunctionReturn(0); 2395 } 2396 2397 #if defined(PETSC_HAVE_PBGL) 2398 2399 #include <boost/parallel/mpi/bsp_process_group.hpp> 2400 #include <boost/graph/distributed/ilu_default_graph.hpp> 2401 #include <boost/graph/distributed/ilu_0_block.hpp> 2402 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2403 #include <boost/graph/distributed/petsc/interface.hpp> 2404 #include <boost/multi_array.hpp> 2405 #include <boost/parallel/distributed_property_map->hpp> 2406 2407 #undef __FUNCT__ 2408 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2409 /* 2410 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2411 */ 2412 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2413 { 2414 namespace petsc = boost::distributed::petsc; 2415 2416 namespace graph_dist = boost::graph::distributed; 2417 using boost::graph::distributed::ilu_default::process_group_type; 2418 using boost::graph::ilu_permuted; 2419 2420 PetscBool row_identity, col_identity; 2421 PetscContainer c; 2422 PetscInt m, n, M, N; 2423 PetscErrorCode ierr; 2424 2425 PetscFunctionBegin; 2426 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2427 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2428 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2429 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2430 2431 process_group_type pg; 2432 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2433 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2434 lgraph_type& level_graph = *lgraph_p; 2435 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2436 2437 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2438 ilu_permuted(level_graph); 2439 2440 /* put together the new matrix */ 2441 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2442 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2443 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2444 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2445 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2446 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2447 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2448 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2449 2450 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2451 ierr = PetscContainerSetPointer(c, lgraph_p); 2452 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2453 ierr = PetscContainerDestroy(&c); 2454 PetscFunctionReturn(0); 2455 } 2456 2457 #undef __FUNCT__ 2458 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2459 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2460 { 2461 PetscFunctionBegin; 2462 PetscFunctionReturn(0); 2463 } 2464 2465 #undef __FUNCT__ 2466 #define __FUNCT__ "MatSolve_MPIAIJ" 2467 /* 2468 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2469 */ 2470 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2471 { 2472 namespace graph_dist = boost::graph::distributed; 2473 2474 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2475 lgraph_type *lgraph_p; 2476 PetscContainer c; 2477 PetscErrorCode ierr; 2478 2479 PetscFunctionBegin; 2480 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2481 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2482 ierr = VecCopy(b, x);CHKERRQ(ierr); 2483 2484 PetscScalar *array_x; 2485 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2486 PetscInt sx; 2487 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2488 2489 PetscScalar *array_b; 2490 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2491 PetscInt sb; 2492 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2493 2494 lgraph_type& level_graph = *lgraph_p; 2495 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2496 2497 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2498 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2499 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2500 2501 typedef boost::iterator_property_map<array_ref_type::iterator, 2502 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2503 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2504 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2505 2506 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2507 PetscFunctionReturn(0); 2508 } 2509 #endif 2510 2511 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2512 PetscInt nzlocal,nsends,nrecvs; 2513 PetscMPIInt *send_rank,*recv_rank; 2514 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2515 PetscScalar *sbuf_a,**rbuf_a; 2516 PetscErrorCode (*Destroy)(Mat); 2517 } Mat_Redundant; 2518 2519 #undef __FUNCT__ 2520 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2521 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2522 { 2523 PetscErrorCode ierr; 2524 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2525 PetscInt i; 2526 2527 PetscFunctionBegin; 2528 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2529 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2530 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2531 for (i=0; i<redund->nrecvs; i++) { 2532 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2533 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2534 } 2535 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2536 ierr = PetscFree(redund);CHKERRQ(ierr); 2537 PetscFunctionReturn(0); 2538 } 2539 2540 #undef __FUNCT__ 2541 #define __FUNCT__ "MatDestroy_MatRedundant" 2542 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2543 { 2544 PetscErrorCode ierr; 2545 PetscContainer container; 2546 Mat_Redundant *redund=NULL; 2547 2548 PetscFunctionBegin; 2549 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2550 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2551 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2552 2553 A->ops->destroy = redund->Destroy; 2554 2555 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2556 if (A->ops->destroy) { 2557 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2558 } 2559 PetscFunctionReturn(0); 2560 } 2561 2562 #undef __FUNCT__ 2563 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2564 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2565 { 2566 PetscMPIInt rank,size; 2567 MPI_Comm comm; 2568 PetscErrorCode ierr; 2569 PetscInt nsends = 0,nrecvs=0,i,rownz_max=0; 2570 PetscMPIInt *send_rank= NULL,*recv_rank=NULL; 2571 PetscInt *rowrange = mat->rmap->range; 2572 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2573 Mat A = aij->A,B=aij->B,C=*matredundant; 2574 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2575 PetscScalar *sbuf_a; 2576 PetscInt nzlocal=a->nz+b->nz; 2577 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2578 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2579 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2580 MatScalar *aworkA,*aworkB; 2581 PetscScalar *vals; 2582 PetscMPIInt tag1,tag2,tag3,imdex; 2583 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2584 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2585 MPI_Status recv_status,*send_status; 2586 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2587 PetscInt **rbuf_j=NULL; 2588 PetscScalar **rbuf_a=NULL; 2589 Mat_Redundant *redund =NULL; 2590 PetscContainer container; 2591 2592 PetscFunctionBegin; 2593 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2594 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2595 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2596 2597 if (reuse == MAT_REUSE_MATRIX) { 2598 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2599 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2600 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2601 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2602 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2603 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2604 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2605 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2606 2607 nsends = redund->nsends; 2608 nrecvs = redund->nrecvs; 2609 send_rank = redund->send_rank; 2610 recv_rank = redund->recv_rank; 2611 sbuf_nz = redund->sbuf_nz; 2612 rbuf_nz = redund->rbuf_nz; 2613 sbuf_j = redund->sbuf_j; 2614 sbuf_a = redund->sbuf_a; 2615 rbuf_j = redund->rbuf_j; 2616 rbuf_a = redund->rbuf_a; 2617 } 2618 2619 if (reuse == MAT_INITIAL_MATRIX) { 2620 PetscMPIInt subrank,subsize; 2621 PetscInt nleftover,np_subcomm; 2622 /* get the destination processors' id send_rank, nsends and nrecvs */ 2623 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2624 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2625 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2626 2627 np_subcomm = size/nsubcomm; 2628 nleftover = size - nsubcomm*np_subcomm; 2629 2630 nsends = 0; nrecvs = 0; 2631 for (i=0; i<size; i++) { /* i=rank*/ 2632 if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */ 2633 send_rank[nsends] = i; nsends++; 2634 recv_rank[nrecvs++] = i; 2635 } 2636 } 2637 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2638 i = size-nleftover-1; 2639 j = 0; 2640 while (j < nsubcomm - nleftover) { 2641 send_rank[nsends++] = i; 2642 i--; j++; 2643 } 2644 } 2645 2646 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2647 for (i=0; i<nleftover; i++) { 2648 recv_rank[nrecvs++] = size-nleftover+i; 2649 } 2650 } 2651 2652 /* allocate sbuf_j, sbuf_a */ 2653 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2654 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2655 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2656 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2657 2658 /* copy mat's local entries into the buffers */ 2659 if (reuse == MAT_INITIAL_MATRIX) { 2660 rownz_max = 0; 2661 rptr = sbuf_j; 2662 cols = sbuf_j + rend-rstart + 1; 2663 vals = sbuf_a; 2664 rptr[0] = 0; 2665 for (i=0; i<rend-rstart; i++) { 2666 row = i + rstart; 2667 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2668 ncols = nzA + nzB; 2669 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2670 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2671 /* load the column indices for this row into cols */ 2672 lwrite = 0; 2673 for (l=0; l<nzB; l++) { 2674 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2675 vals[lwrite] = aworkB[l]; 2676 cols[lwrite++] = ctmp; 2677 } 2678 } 2679 for (l=0; l<nzA; l++) { 2680 vals[lwrite] = aworkA[l]; 2681 cols[lwrite++] = cstart + cworkA[l]; 2682 } 2683 for (l=0; l<nzB; l++) { 2684 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2685 vals[lwrite] = aworkB[l]; 2686 cols[lwrite++] = ctmp; 2687 } 2688 } 2689 vals += ncols; 2690 cols += ncols; 2691 rptr[i+1] = rptr[i] + ncols; 2692 if (rownz_max < ncols) rownz_max = ncols; 2693 } 2694 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2695 } else { /* only copy matrix values into sbuf_a */ 2696 rptr = sbuf_j; 2697 vals = sbuf_a; 2698 rptr[0] = 0; 2699 for (i=0; i<rend-rstart; i++) { 2700 row = i + rstart; 2701 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2702 ncols = nzA + nzB; 2703 cworkB = b->j + b->i[i]; 2704 aworkA = a->a + a->i[i]; 2705 aworkB = b->a + b->i[i]; 2706 lwrite = 0; 2707 for (l=0; l<nzB; l++) { 2708 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2709 } 2710 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2711 for (l=0; l<nzB; l++) { 2712 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2713 } 2714 vals += ncols; 2715 rptr[i+1] = rptr[i] + ncols; 2716 } 2717 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2718 2719 /* send nzlocal to others, and recv other's nzlocal */ 2720 /*--------------------------------------------------*/ 2721 if (reuse == MAT_INITIAL_MATRIX) { 2722 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2723 2724 s_waits2 = s_waits3 + nsends; 2725 s_waits1 = s_waits2 + nsends; 2726 r_waits1 = s_waits1 + nsends; 2727 r_waits2 = r_waits1 + nrecvs; 2728 r_waits3 = r_waits2 + nrecvs; 2729 } else { 2730 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2731 2732 r_waits3 = s_waits3 + nsends; 2733 } 2734 2735 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2736 if (reuse == MAT_INITIAL_MATRIX) { 2737 /* get new tags to keep the communication clean */ 2738 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2739 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2740 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2741 2742 /* post receives of other's nzlocal */ 2743 for (i=0; i<nrecvs; i++) { 2744 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2745 } 2746 /* send nzlocal to others */ 2747 for (i=0; i<nsends; i++) { 2748 sbuf_nz[i] = nzlocal; 2749 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2750 } 2751 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2752 count = nrecvs; 2753 while (count) { 2754 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2755 2756 recv_rank[imdex] = recv_status.MPI_SOURCE; 2757 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2758 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2759 2760 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2761 2762 rbuf_nz[imdex] += i + 2; 2763 2764 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2765 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2766 count--; 2767 } 2768 /* wait on sends of nzlocal */ 2769 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2770 /* send mat->i,j to others, and recv from other's */ 2771 /*------------------------------------------------*/ 2772 for (i=0; i<nsends; i++) { 2773 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2774 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2775 } 2776 /* wait on receives of mat->i,j */ 2777 /*------------------------------*/ 2778 count = nrecvs; 2779 while (count) { 2780 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2781 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2782 count--; 2783 } 2784 /* wait on sends of mat->i,j */ 2785 /*---------------------------*/ 2786 if (nsends) { 2787 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2788 } 2789 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2790 2791 /* post receives, send and receive mat->a */ 2792 /*----------------------------------------*/ 2793 for (imdex=0; imdex<nrecvs; imdex++) { 2794 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2795 } 2796 for (i=0; i<nsends; i++) { 2797 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2798 } 2799 count = nrecvs; 2800 while (count) { 2801 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2802 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2803 count--; 2804 } 2805 if (nsends) { 2806 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2807 } 2808 2809 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2810 2811 /* create redundant matrix */ 2812 /*-------------------------*/ 2813 if (reuse == MAT_INITIAL_MATRIX) { 2814 /* compute rownz_max for preallocation */ 2815 for (imdex=0; imdex<nrecvs; imdex++) { 2816 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2817 rptr = rbuf_j[imdex]; 2818 for (i=0; i<j; i++) { 2819 ncols = rptr[i+1] - rptr[i]; 2820 if (rownz_max < ncols) rownz_max = ncols; 2821 } 2822 } 2823 2824 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2825 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2826 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2827 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2828 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2829 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2830 } else { 2831 C = *matredundant; 2832 } 2833 2834 /* insert local matrix entries */ 2835 rptr = sbuf_j; 2836 cols = sbuf_j + rend-rstart + 1; 2837 vals = sbuf_a; 2838 for (i=0; i<rend-rstart; i++) { 2839 row = i + rstart; 2840 ncols = rptr[i+1] - rptr[i]; 2841 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2842 vals += ncols; 2843 cols += ncols; 2844 } 2845 /* insert received matrix entries */ 2846 for (imdex=0; imdex<nrecvs; imdex++) { 2847 rstart = rowrange[recv_rank[imdex]]; 2848 rend = rowrange[recv_rank[imdex]+1]; 2849 rptr = rbuf_j[imdex]; 2850 cols = rbuf_j[imdex] + rend-rstart + 1; 2851 vals = rbuf_a[imdex]; 2852 for (i=0; i<rend-rstart; i++) { 2853 row = i + rstart; 2854 ncols = rptr[i+1] - rptr[i]; 2855 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2856 vals += ncols; 2857 cols += ncols; 2858 } 2859 } 2860 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2861 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2862 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2863 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2864 if (reuse == MAT_INITIAL_MATRIX) { 2865 PetscContainer container; 2866 *matredundant = C; 2867 /* create a supporting struct and attach it to C for reuse */ 2868 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2869 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2870 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2871 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2872 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2873 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2874 2875 redund->nzlocal = nzlocal; 2876 redund->nsends = nsends; 2877 redund->nrecvs = nrecvs; 2878 redund->send_rank = send_rank; 2879 redund->recv_rank = recv_rank; 2880 redund->sbuf_nz = sbuf_nz; 2881 redund->rbuf_nz = rbuf_nz; 2882 redund->sbuf_j = sbuf_j; 2883 redund->sbuf_a = sbuf_a; 2884 redund->rbuf_j = rbuf_j; 2885 redund->rbuf_a = rbuf_a; 2886 2887 redund->Destroy = C->ops->destroy; 2888 C->ops->destroy = MatDestroy_MatRedundant; 2889 } 2890 PetscFunctionReturn(0); 2891 } 2892 2893 #undef __FUNCT__ 2894 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2895 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2896 { 2897 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2898 PetscErrorCode ierr; 2899 PetscInt i,*idxb = 0; 2900 PetscScalar *va,*vb; 2901 Vec vtmp; 2902 2903 PetscFunctionBegin; 2904 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2905 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2906 if (idx) { 2907 for (i=0; i<A->rmap->n; i++) { 2908 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2909 } 2910 } 2911 2912 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2913 if (idx) { 2914 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2915 } 2916 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2917 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2918 2919 for (i=0; i<A->rmap->n; i++) { 2920 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2921 va[i] = vb[i]; 2922 if (idx) idx[i] = a->garray[idxb[i]]; 2923 } 2924 } 2925 2926 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2927 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2928 ierr = PetscFree(idxb);CHKERRQ(ierr); 2929 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2930 PetscFunctionReturn(0); 2931 } 2932 2933 #undef __FUNCT__ 2934 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2935 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2936 { 2937 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2938 PetscErrorCode ierr; 2939 PetscInt i,*idxb = 0; 2940 PetscScalar *va,*vb; 2941 Vec vtmp; 2942 2943 PetscFunctionBegin; 2944 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2945 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2946 if (idx) { 2947 for (i=0; i<A->cmap->n; i++) { 2948 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2949 } 2950 } 2951 2952 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2953 if (idx) { 2954 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2955 } 2956 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2957 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2958 2959 for (i=0; i<A->rmap->n; i++) { 2960 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2961 va[i] = vb[i]; 2962 if (idx) idx[i] = a->garray[idxb[i]]; 2963 } 2964 } 2965 2966 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2967 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2968 ierr = PetscFree(idxb);CHKERRQ(ierr); 2969 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2970 PetscFunctionReturn(0); 2971 } 2972 2973 #undef __FUNCT__ 2974 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2975 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2976 { 2977 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2978 PetscInt n = A->rmap->n; 2979 PetscInt cstart = A->cmap->rstart; 2980 PetscInt *cmap = mat->garray; 2981 PetscInt *diagIdx, *offdiagIdx; 2982 Vec diagV, offdiagV; 2983 PetscScalar *a, *diagA, *offdiagA; 2984 PetscInt r; 2985 PetscErrorCode ierr; 2986 2987 PetscFunctionBegin; 2988 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2989 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2990 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2991 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2992 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2993 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2994 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2995 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2996 for (r = 0; r < n; ++r) { 2997 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2998 a[r] = diagA[r]; 2999 idx[r] = cstart + diagIdx[r]; 3000 } else { 3001 a[r] = offdiagA[r]; 3002 idx[r] = cmap[offdiagIdx[r]]; 3003 } 3004 } 3005 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3006 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3007 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3008 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3009 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3010 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3011 PetscFunctionReturn(0); 3012 } 3013 3014 #undef __FUNCT__ 3015 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3016 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3017 { 3018 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3019 PetscInt n = A->rmap->n; 3020 PetscInt cstart = A->cmap->rstart; 3021 PetscInt *cmap = mat->garray; 3022 PetscInt *diagIdx, *offdiagIdx; 3023 Vec diagV, offdiagV; 3024 PetscScalar *a, *diagA, *offdiagA; 3025 PetscInt r; 3026 PetscErrorCode ierr; 3027 3028 PetscFunctionBegin; 3029 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3030 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3031 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3032 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3033 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3034 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3035 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3036 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3037 for (r = 0; r < n; ++r) { 3038 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3039 a[r] = diagA[r]; 3040 idx[r] = cstart + diagIdx[r]; 3041 } else { 3042 a[r] = offdiagA[r]; 3043 idx[r] = cmap[offdiagIdx[r]]; 3044 } 3045 } 3046 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3047 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3048 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3049 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3050 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3051 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3052 PetscFunctionReturn(0); 3053 } 3054 3055 #undef __FUNCT__ 3056 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3057 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3058 { 3059 PetscErrorCode ierr; 3060 Mat *dummy; 3061 3062 PetscFunctionBegin; 3063 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3064 *newmat = *dummy; 3065 ierr = PetscFree(dummy);CHKERRQ(ierr); 3066 PetscFunctionReturn(0); 3067 } 3068 3069 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3070 3071 #undef __FUNCT__ 3072 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3073 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3074 { 3075 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3076 PetscErrorCode ierr; 3077 3078 PetscFunctionBegin; 3079 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3080 PetscFunctionReturn(0); 3081 } 3082 3083 #undef __FUNCT__ 3084 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3085 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3086 { 3087 PetscErrorCode ierr; 3088 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3089 3090 PetscFunctionBegin; 3091 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3092 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3093 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3094 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3095 PetscFunctionReturn(0); 3096 } 3097 3098 /* -------------------------------------------------------------------*/ 3099 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3100 MatGetRow_MPIAIJ, 3101 MatRestoreRow_MPIAIJ, 3102 MatMult_MPIAIJ, 3103 /* 4*/ MatMultAdd_MPIAIJ, 3104 MatMultTranspose_MPIAIJ, 3105 MatMultTransposeAdd_MPIAIJ, 3106 #if defined(PETSC_HAVE_PBGL) 3107 MatSolve_MPIAIJ, 3108 #else 3109 0, 3110 #endif 3111 0, 3112 0, 3113 /*10*/ 0, 3114 0, 3115 0, 3116 MatSOR_MPIAIJ, 3117 MatTranspose_MPIAIJ, 3118 /*15*/ MatGetInfo_MPIAIJ, 3119 MatEqual_MPIAIJ, 3120 MatGetDiagonal_MPIAIJ, 3121 MatDiagonalScale_MPIAIJ, 3122 MatNorm_MPIAIJ, 3123 /*20*/ MatAssemblyBegin_MPIAIJ, 3124 MatAssemblyEnd_MPIAIJ, 3125 MatSetOption_MPIAIJ, 3126 MatZeroEntries_MPIAIJ, 3127 /*24*/ MatZeroRows_MPIAIJ, 3128 0, 3129 #if defined(PETSC_HAVE_PBGL) 3130 0, 3131 #else 3132 0, 3133 #endif 3134 0, 3135 0, 3136 /*29*/ MatSetUp_MPIAIJ, 3137 #if defined(PETSC_HAVE_PBGL) 3138 0, 3139 #else 3140 0, 3141 #endif 3142 0, 3143 0, 3144 0, 3145 /*34*/ MatDuplicate_MPIAIJ, 3146 0, 3147 0, 3148 0, 3149 0, 3150 /*39*/ MatAXPY_MPIAIJ, 3151 MatGetSubMatrices_MPIAIJ, 3152 MatIncreaseOverlap_MPIAIJ, 3153 MatGetValues_MPIAIJ, 3154 MatCopy_MPIAIJ, 3155 /*44*/ MatGetRowMax_MPIAIJ, 3156 MatScale_MPIAIJ, 3157 0, 3158 0, 3159 MatZeroRowsColumns_MPIAIJ, 3160 /*49*/ MatSetRandom_MPIAIJ, 3161 0, 3162 0, 3163 0, 3164 0, 3165 /*54*/ MatFDColoringCreate_MPIAIJ, 3166 0, 3167 MatSetUnfactored_MPIAIJ, 3168 MatPermute_MPIAIJ, 3169 0, 3170 /*59*/ MatGetSubMatrix_MPIAIJ, 3171 MatDestroy_MPIAIJ, 3172 MatView_MPIAIJ, 3173 0, 3174 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3175 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3176 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3177 0, 3178 0, 3179 0, 3180 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3181 MatGetRowMinAbs_MPIAIJ, 3182 0, 3183 MatSetColoring_MPIAIJ, 3184 0, 3185 MatSetValuesAdifor_MPIAIJ, 3186 /*75*/ MatFDColoringApply_AIJ, 3187 0, 3188 0, 3189 0, 3190 MatFindZeroDiagonals_MPIAIJ, 3191 /*80*/ 0, 3192 0, 3193 0, 3194 /*83*/ MatLoad_MPIAIJ, 3195 0, 3196 0, 3197 0, 3198 0, 3199 0, 3200 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3201 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3202 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3203 MatPtAP_MPIAIJ_MPIAIJ, 3204 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3205 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3206 0, 3207 0, 3208 0, 3209 0, 3210 /*99*/ 0, 3211 0, 3212 0, 3213 MatConjugate_MPIAIJ, 3214 0, 3215 /*104*/MatSetValuesRow_MPIAIJ, 3216 MatRealPart_MPIAIJ, 3217 MatImaginaryPart_MPIAIJ, 3218 0, 3219 0, 3220 /*109*/0, 3221 MatGetRedundantMatrix_MPIAIJ, 3222 MatGetRowMin_MPIAIJ, 3223 0, 3224 0, 3225 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3226 0, 3227 0, 3228 0, 3229 0, 3230 /*119*/0, 3231 0, 3232 0, 3233 0, 3234 MatGetMultiProcBlock_MPIAIJ, 3235 /*124*/MatFindNonzeroRows_MPIAIJ, 3236 MatGetColumnNorms_MPIAIJ, 3237 MatInvertBlockDiagonal_MPIAIJ, 3238 0, 3239 MatGetSubMatricesParallel_MPIAIJ, 3240 /*129*/0, 3241 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3242 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3243 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3244 0, 3245 /*134*/0, 3246 0, 3247 0, 3248 0, 3249 0, 3250 /*139*/0, 3251 0 3252 }; 3253 3254 /* ----------------------------------------------------------------------------------------*/ 3255 3256 #undef __FUNCT__ 3257 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3258 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3259 { 3260 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3261 PetscErrorCode ierr; 3262 3263 PetscFunctionBegin; 3264 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3265 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3266 PetscFunctionReturn(0); 3267 } 3268 3269 #undef __FUNCT__ 3270 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3271 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3272 { 3273 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3274 PetscErrorCode ierr; 3275 3276 PetscFunctionBegin; 3277 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3278 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3279 PetscFunctionReturn(0); 3280 } 3281 3282 #undef __FUNCT__ 3283 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3284 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3285 { 3286 Mat_MPIAIJ *b; 3287 PetscErrorCode ierr; 3288 3289 PetscFunctionBegin; 3290 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3291 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3292 b = (Mat_MPIAIJ*)B->data; 3293 3294 if (!B->preallocated) { 3295 /* Explicitly create 2 MATSEQAIJ matrices. */ 3296 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3297 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3298 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3299 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3300 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3301 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3302 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3303 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3304 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3305 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3306 } 3307 3308 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3309 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3310 B->preallocated = PETSC_TRUE; 3311 PetscFunctionReturn(0); 3312 } 3313 3314 #undef __FUNCT__ 3315 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3316 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3317 { 3318 Mat mat; 3319 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3320 PetscErrorCode ierr; 3321 3322 PetscFunctionBegin; 3323 *newmat = 0; 3324 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3325 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3326 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3327 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3328 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3329 a = (Mat_MPIAIJ*)mat->data; 3330 3331 mat->factortype = matin->factortype; 3332 mat->rmap->bs = matin->rmap->bs; 3333 mat->cmap->bs = matin->cmap->bs; 3334 mat->assembled = PETSC_TRUE; 3335 mat->insertmode = NOT_SET_VALUES; 3336 mat->preallocated = PETSC_TRUE; 3337 3338 a->size = oldmat->size; 3339 a->rank = oldmat->rank; 3340 a->donotstash = oldmat->donotstash; 3341 a->roworiented = oldmat->roworiented; 3342 a->rowindices = 0; 3343 a->rowvalues = 0; 3344 a->getrowactive = PETSC_FALSE; 3345 3346 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3347 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3348 3349 if (oldmat->colmap) { 3350 #if defined(PETSC_USE_CTABLE) 3351 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3352 #else 3353 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3354 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3355 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3356 #endif 3357 } else a->colmap = 0; 3358 if (oldmat->garray) { 3359 PetscInt len; 3360 len = oldmat->B->cmap->n; 3361 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3362 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3363 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3364 } else a->garray = 0; 3365 3366 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3367 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3368 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3369 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3370 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3371 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3372 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3373 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3374 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3375 *newmat = mat; 3376 PetscFunctionReturn(0); 3377 } 3378 3379 3380 3381 #undef __FUNCT__ 3382 #define __FUNCT__ "MatLoad_MPIAIJ" 3383 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3384 { 3385 PetscScalar *vals,*svals; 3386 MPI_Comm comm; 3387 PetscErrorCode ierr; 3388 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3389 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3390 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3391 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3392 PetscInt cend,cstart,n,*rowners,sizesset=1; 3393 int fd; 3394 PetscInt bs = 1; 3395 3396 PetscFunctionBegin; 3397 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3398 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3399 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3400 if (!rank) { 3401 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3402 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3403 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3404 } 3405 3406 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3407 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3408 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3409 3410 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3411 3412 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3413 M = header[1]; N = header[2]; 3414 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3415 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3416 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3417 3418 /* If global sizes are set, check if they are consistent with that given in the file */ 3419 if (sizesset) { 3420 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3421 } 3422 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3423 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3424 3425 /* determine ownership of all (block) rows */ 3426 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3427 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3428 else m = newMat->rmap->n; /* Set by user */ 3429 3430 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3431 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3432 3433 /* First process needs enough room for process with most rows */ 3434 if (!rank) { 3435 mmax = rowners[1]; 3436 for (i=2; i<=size; i++) { 3437 mmax = PetscMax(mmax, rowners[i]); 3438 } 3439 } else mmax = -1; /* unused, but compilers complain */ 3440 3441 rowners[0] = 0; 3442 for (i=2; i<=size; i++) { 3443 rowners[i] += rowners[i-1]; 3444 } 3445 rstart = rowners[rank]; 3446 rend = rowners[rank+1]; 3447 3448 /* distribute row lengths to all processors */ 3449 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 3450 if (!rank) { 3451 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3452 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3453 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3454 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3455 for (j=0; j<m; j++) { 3456 procsnz[0] += ourlens[j]; 3457 } 3458 for (i=1; i<size; i++) { 3459 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3460 /* calculate the number of nonzeros on each processor */ 3461 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3462 procsnz[i] += rowlengths[j]; 3463 } 3464 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3465 } 3466 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3467 } else { 3468 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3469 } 3470 3471 if (!rank) { 3472 /* determine max buffer needed and allocate it */ 3473 maxnz = 0; 3474 for (i=0; i<size; i++) { 3475 maxnz = PetscMax(maxnz,procsnz[i]); 3476 } 3477 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3478 3479 /* read in my part of the matrix column indices */ 3480 nz = procsnz[0]; 3481 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3482 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3483 3484 /* read in every one elses and ship off */ 3485 for (i=1; i<size; i++) { 3486 nz = procsnz[i]; 3487 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3488 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3489 } 3490 ierr = PetscFree(cols);CHKERRQ(ierr); 3491 } else { 3492 /* determine buffer space needed for message */ 3493 nz = 0; 3494 for (i=0; i<m; i++) { 3495 nz += ourlens[i]; 3496 } 3497 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3498 3499 /* receive message of column indices*/ 3500 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3501 } 3502 3503 /* determine column ownership if matrix is not square */ 3504 if (N != M) { 3505 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3506 else n = newMat->cmap->n; 3507 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3508 cstart = cend - n; 3509 } else { 3510 cstart = rstart; 3511 cend = rend; 3512 n = cend - cstart; 3513 } 3514 3515 /* loop over local rows, determining number of off diagonal entries */ 3516 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3517 jj = 0; 3518 for (i=0; i<m; i++) { 3519 for (j=0; j<ourlens[i]; j++) { 3520 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3521 jj++; 3522 } 3523 } 3524 3525 for (i=0; i<m; i++) { 3526 ourlens[i] -= offlens[i]; 3527 } 3528 if (!sizesset) { 3529 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3530 } 3531 3532 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3533 3534 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3535 3536 for (i=0; i<m; i++) { 3537 ourlens[i] += offlens[i]; 3538 } 3539 3540 if (!rank) { 3541 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3542 3543 /* read in my part of the matrix numerical values */ 3544 nz = procsnz[0]; 3545 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3546 3547 /* insert into matrix */ 3548 jj = rstart; 3549 smycols = mycols; 3550 svals = vals; 3551 for (i=0; i<m; i++) { 3552 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3553 smycols += ourlens[i]; 3554 svals += ourlens[i]; 3555 jj++; 3556 } 3557 3558 /* read in other processors and ship out */ 3559 for (i=1; i<size; i++) { 3560 nz = procsnz[i]; 3561 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3562 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3563 } 3564 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3565 } else { 3566 /* receive numeric values */ 3567 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3568 3569 /* receive message of values*/ 3570 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3571 3572 /* insert into matrix */ 3573 jj = rstart; 3574 smycols = mycols; 3575 svals = vals; 3576 for (i=0; i<m; i++) { 3577 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3578 smycols += ourlens[i]; 3579 svals += ourlens[i]; 3580 jj++; 3581 } 3582 } 3583 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3584 ierr = PetscFree(vals);CHKERRQ(ierr); 3585 ierr = PetscFree(mycols);CHKERRQ(ierr); 3586 ierr = PetscFree(rowners);CHKERRQ(ierr); 3587 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3588 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3589 PetscFunctionReturn(0); 3590 } 3591 3592 #undef __FUNCT__ 3593 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3594 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3595 { 3596 PetscErrorCode ierr; 3597 IS iscol_local; 3598 PetscInt csize; 3599 3600 PetscFunctionBegin; 3601 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3602 if (call == MAT_REUSE_MATRIX) { 3603 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3604 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3605 } else { 3606 PetscInt cbs; 3607 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3608 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3609 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3610 } 3611 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3612 if (call == MAT_INITIAL_MATRIX) { 3613 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3614 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3615 } 3616 PetscFunctionReturn(0); 3617 } 3618 3619 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3620 #undef __FUNCT__ 3621 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3622 /* 3623 Not great since it makes two copies of the submatrix, first an SeqAIJ 3624 in local and then by concatenating the local matrices the end result. 3625 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3626 3627 Note: This requires a sequential iscol with all indices. 3628 */ 3629 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3630 { 3631 PetscErrorCode ierr; 3632 PetscMPIInt rank,size; 3633 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3634 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3635 PetscBool allcolumns, colflag; 3636 Mat M,Mreuse; 3637 MatScalar *vwork,*aa; 3638 MPI_Comm comm; 3639 Mat_SeqAIJ *aij; 3640 3641 PetscFunctionBegin; 3642 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3643 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3644 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3645 3646 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3647 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3648 if (colflag && ncol == mat->cmap->N) { 3649 allcolumns = PETSC_TRUE; 3650 } else { 3651 allcolumns = PETSC_FALSE; 3652 } 3653 if (call == MAT_REUSE_MATRIX) { 3654 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3655 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3656 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3657 } else { 3658 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3659 } 3660 3661 /* 3662 m - number of local rows 3663 n - number of columns (same on all processors) 3664 rstart - first row in new global matrix generated 3665 */ 3666 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3667 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3668 if (call == MAT_INITIAL_MATRIX) { 3669 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3670 ii = aij->i; 3671 jj = aij->j; 3672 3673 /* 3674 Determine the number of non-zeros in the diagonal and off-diagonal 3675 portions of the matrix in order to do correct preallocation 3676 */ 3677 3678 /* first get start and end of "diagonal" columns */ 3679 if (csize == PETSC_DECIDE) { 3680 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3681 if (mglobal == n) { /* square matrix */ 3682 nlocal = m; 3683 } else { 3684 nlocal = n/size + ((n % size) > rank); 3685 } 3686 } else { 3687 nlocal = csize; 3688 } 3689 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3690 rstart = rend - nlocal; 3691 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3692 3693 /* next, compute all the lengths */ 3694 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3695 olens = dlens + m; 3696 for (i=0; i<m; i++) { 3697 jend = ii[i+1] - ii[i]; 3698 olen = 0; 3699 dlen = 0; 3700 for (j=0; j<jend; j++) { 3701 if (*jj < rstart || *jj >= rend) olen++; 3702 else dlen++; 3703 jj++; 3704 } 3705 olens[i] = olen; 3706 dlens[i] = dlen; 3707 } 3708 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3709 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3710 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3711 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3712 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3713 ierr = PetscFree(dlens);CHKERRQ(ierr); 3714 } else { 3715 PetscInt ml,nl; 3716 3717 M = *newmat; 3718 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3719 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3720 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3721 /* 3722 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3723 rather than the slower MatSetValues(). 3724 */ 3725 M->was_assembled = PETSC_TRUE; 3726 M->assembled = PETSC_FALSE; 3727 } 3728 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3729 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3730 ii = aij->i; 3731 jj = aij->j; 3732 aa = aij->a; 3733 for (i=0; i<m; i++) { 3734 row = rstart + i; 3735 nz = ii[i+1] - ii[i]; 3736 cwork = jj; jj += nz; 3737 vwork = aa; aa += nz; 3738 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3739 } 3740 3741 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3742 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3743 *newmat = M; 3744 3745 /* save submatrix used in processor for next request */ 3746 if (call == MAT_INITIAL_MATRIX) { 3747 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3748 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3749 } 3750 PetscFunctionReturn(0); 3751 } 3752 3753 #undef __FUNCT__ 3754 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3755 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3756 { 3757 PetscInt m,cstart, cend,j,nnz,i,d; 3758 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3759 const PetscInt *JJ; 3760 PetscScalar *values; 3761 PetscErrorCode ierr; 3762 3763 PetscFunctionBegin; 3764 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3765 3766 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3767 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3768 m = B->rmap->n; 3769 cstart = B->cmap->rstart; 3770 cend = B->cmap->rend; 3771 rstart = B->rmap->rstart; 3772 3773 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3774 3775 #if defined(PETSC_USE_DEBUGGING) 3776 for (i=0; i<m; i++) { 3777 nnz = Ii[i+1]- Ii[i]; 3778 JJ = J + Ii[i]; 3779 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3780 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3781 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3782 } 3783 #endif 3784 3785 for (i=0; i<m; i++) { 3786 nnz = Ii[i+1]- Ii[i]; 3787 JJ = J + Ii[i]; 3788 nnz_max = PetscMax(nnz_max,nnz); 3789 d = 0; 3790 for (j=0; j<nnz; j++) { 3791 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3792 } 3793 d_nnz[i] = d; 3794 o_nnz[i] = nnz - d; 3795 } 3796 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3797 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3798 3799 if (v) values = (PetscScalar*)v; 3800 else { 3801 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3802 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3803 } 3804 3805 for (i=0; i<m; i++) { 3806 ii = i + rstart; 3807 nnz = Ii[i+1]- Ii[i]; 3808 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3809 } 3810 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3811 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3812 3813 if (!v) { 3814 ierr = PetscFree(values);CHKERRQ(ierr); 3815 } 3816 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3817 PetscFunctionReturn(0); 3818 } 3819 3820 #undef __FUNCT__ 3821 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3822 /*@ 3823 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3824 (the default parallel PETSc format). 3825 3826 Collective on MPI_Comm 3827 3828 Input Parameters: 3829 + B - the matrix 3830 . i - the indices into j for the start of each local row (starts with zero) 3831 . j - the column indices for each local row (starts with zero) 3832 - v - optional values in the matrix 3833 3834 Level: developer 3835 3836 Notes: 3837 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3838 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3839 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3840 3841 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3842 3843 The format which is used for the sparse matrix input, is equivalent to a 3844 row-major ordering.. i.e for the following matrix, the input data expected is 3845 as shown: 3846 3847 1 0 0 3848 2 0 3 P0 3849 ------- 3850 4 5 6 P1 3851 3852 Process0 [P0]: rows_owned=[0,1] 3853 i = {0,1,3} [size = nrow+1 = 2+1] 3854 j = {0,0,2} [size = nz = 6] 3855 v = {1,2,3} [size = nz = 6] 3856 3857 Process1 [P1]: rows_owned=[2] 3858 i = {0,3} [size = nrow+1 = 1+1] 3859 j = {0,1,2} [size = nz = 6] 3860 v = {4,5,6} [size = nz = 6] 3861 3862 .keywords: matrix, aij, compressed row, sparse, parallel 3863 3864 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3865 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3866 @*/ 3867 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3868 { 3869 PetscErrorCode ierr; 3870 3871 PetscFunctionBegin; 3872 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3873 PetscFunctionReturn(0); 3874 } 3875 3876 #undef __FUNCT__ 3877 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3878 /*@C 3879 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3880 (the default parallel PETSc format). For good matrix assembly performance 3881 the user should preallocate the matrix storage by setting the parameters 3882 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3883 performance can be increased by more than a factor of 50. 3884 3885 Collective on MPI_Comm 3886 3887 Input Parameters: 3888 + A - the matrix 3889 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3890 (same value is used for all local rows) 3891 . d_nnz - array containing the number of nonzeros in the various rows of the 3892 DIAGONAL portion of the local submatrix (possibly different for each row) 3893 or NULL, if d_nz is used to specify the nonzero structure. 3894 The size of this array is equal to the number of local rows, i.e 'm'. 3895 For matrices that will be factored, you must leave room for (and set) 3896 the diagonal entry even if it is zero. 3897 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3898 submatrix (same value is used for all local rows). 3899 - o_nnz - array containing the number of nonzeros in the various rows of the 3900 OFF-DIAGONAL portion of the local submatrix (possibly different for 3901 each row) or NULL, if o_nz is used to specify the nonzero 3902 structure. The size of this array is equal to the number 3903 of local rows, i.e 'm'. 3904 3905 If the *_nnz parameter is given then the *_nz parameter is ignored 3906 3907 The AIJ format (also called the Yale sparse matrix format or 3908 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3909 storage. The stored row and column indices begin with zero. 3910 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3911 3912 The parallel matrix is partitioned such that the first m0 rows belong to 3913 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3914 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3915 3916 The DIAGONAL portion of the local submatrix of a processor can be defined 3917 as the submatrix which is obtained by extraction the part corresponding to 3918 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3919 first row that belongs to the processor, r2 is the last row belonging to 3920 the this processor, and c1-c2 is range of indices of the local part of a 3921 vector suitable for applying the matrix to. This is an mxn matrix. In the 3922 common case of a square matrix, the row and column ranges are the same and 3923 the DIAGONAL part is also square. The remaining portion of the local 3924 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3925 3926 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3927 3928 You can call MatGetInfo() to get information on how effective the preallocation was; 3929 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3930 You can also run with the option -info and look for messages with the string 3931 malloc in them to see if additional memory allocation was needed. 3932 3933 Example usage: 3934 3935 Consider the following 8x8 matrix with 34 non-zero values, that is 3936 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3937 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3938 as follows: 3939 3940 .vb 3941 1 2 0 | 0 3 0 | 0 4 3942 Proc0 0 5 6 | 7 0 0 | 8 0 3943 9 0 10 | 11 0 0 | 12 0 3944 ------------------------------------- 3945 13 0 14 | 15 16 17 | 0 0 3946 Proc1 0 18 0 | 19 20 21 | 0 0 3947 0 0 0 | 22 23 0 | 24 0 3948 ------------------------------------- 3949 Proc2 25 26 27 | 0 0 28 | 29 0 3950 30 0 0 | 31 32 33 | 0 34 3951 .ve 3952 3953 This can be represented as a collection of submatrices as: 3954 3955 .vb 3956 A B C 3957 D E F 3958 G H I 3959 .ve 3960 3961 Where the submatrices A,B,C are owned by proc0, D,E,F are 3962 owned by proc1, G,H,I are owned by proc2. 3963 3964 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3965 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3966 The 'M','N' parameters are 8,8, and have the same values on all procs. 3967 3968 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3969 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3970 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3971 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3972 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3973 matrix, ans [DF] as another SeqAIJ matrix. 3974 3975 When d_nz, o_nz parameters are specified, d_nz storage elements are 3976 allocated for every row of the local diagonal submatrix, and o_nz 3977 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3978 One way to choose d_nz and o_nz is to use the max nonzerors per local 3979 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3980 In this case, the values of d_nz,o_nz are: 3981 .vb 3982 proc0 : dnz = 2, o_nz = 2 3983 proc1 : dnz = 3, o_nz = 2 3984 proc2 : dnz = 1, o_nz = 4 3985 .ve 3986 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3987 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3988 for proc3. i.e we are using 12+15+10=37 storage locations to store 3989 34 values. 3990 3991 When d_nnz, o_nnz parameters are specified, the storage is specified 3992 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3993 In the above case the values for d_nnz,o_nnz are: 3994 .vb 3995 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3996 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3997 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3998 .ve 3999 Here the space allocated is sum of all the above values i.e 34, and 4000 hence pre-allocation is perfect. 4001 4002 Level: intermediate 4003 4004 .keywords: matrix, aij, compressed row, sparse, parallel 4005 4006 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4007 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4008 @*/ 4009 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4010 { 4011 PetscErrorCode ierr; 4012 4013 PetscFunctionBegin; 4014 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4015 PetscValidType(B,1); 4016 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4017 PetscFunctionReturn(0); 4018 } 4019 4020 #undef __FUNCT__ 4021 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4022 /*@ 4023 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4024 CSR format the local rows. 4025 4026 Collective on MPI_Comm 4027 4028 Input Parameters: 4029 + comm - MPI communicator 4030 . m - number of local rows (Cannot be PETSC_DECIDE) 4031 . n - This value should be the same as the local size used in creating the 4032 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4033 calculated if N is given) For square matrices n is almost always m. 4034 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4035 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4036 . i - row indices 4037 . j - column indices 4038 - a - matrix values 4039 4040 Output Parameter: 4041 . mat - the matrix 4042 4043 Level: intermediate 4044 4045 Notes: 4046 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4047 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4048 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4049 4050 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4051 4052 The format which is used for the sparse matrix input, is equivalent to a 4053 row-major ordering.. i.e for the following matrix, the input data expected is 4054 as shown: 4055 4056 1 0 0 4057 2 0 3 P0 4058 ------- 4059 4 5 6 P1 4060 4061 Process0 [P0]: rows_owned=[0,1] 4062 i = {0,1,3} [size = nrow+1 = 2+1] 4063 j = {0,0,2} [size = nz = 6] 4064 v = {1,2,3} [size = nz = 6] 4065 4066 Process1 [P1]: rows_owned=[2] 4067 i = {0,3} [size = nrow+1 = 1+1] 4068 j = {0,1,2} [size = nz = 6] 4069 v = {4,5,6} [size = nz = 6] 4070 4071 .keywords: matrix, aij, compressed row, sparse, parallel 4072 4073 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4074 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4075 @*/ 4076 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4077 { 4078 PetscErrorCode ierr; 4079 4080 PetscFunctionBegin; 4081 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4082 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4083 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4084 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4085 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4086 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4087 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4088 PetscFunctionReturn(0); 4089 } 4090 4091 #undef __FUNCT__ 4092 #define __FUNCT__ "MatCreateAIJ" 4093 /*@C 4094 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4095 (the default parallel PETSc format). For good matrix assembly performance 4096 the user should preallocate the matrix storage by setting the parameters 4097 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4098 performance can be increased by more than a factor of 50. 4099 4100 Collective on MPI_Comm 4101 4102 Input Parameters: 4103 + comm - MPI communicator 4104 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4105 This value should be the same as the local size used in creating the 4106 y vector for the matrix-vector product y = Ax. 4107 . n - This value should be the same as the local size used in creating the 4108 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4109 calculated if N is given) For square matrices n is almost always m. 4110 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4111 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4112 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4113 (same value is used for all local rows) 4114 . d_nnz - array containing the number of nonzeros in the various rows of the 4115 DIAGONAL portion of the local submatrix (possibly different for each row) 4116 or NULL, if d_nz is used to specify the nonzero structure. 4117 The size of this array is equal to the number of local rows, i.e 'm'. 4118 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4119 submatrix (same value is used for all local rows). 4120 - o_nnz - array containing the number of nonzeros in the various rows of the 4121 OFF-DIAGONAL portion of the local submatrix (possibly different for 4122 each row) or NULL, if o_nz is used to specify the nonzero 4123 structure. The size of this array is equal to the number 4124 of local rows, i.e 'm'. 4125 4126 Output Parameter: 4127 . A - the matrix 4128 4129 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4130 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4131 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4132 4133 Notes: 4134 If the *_nnz parameter is given then the *_nz parameter is ignored 4135 4136 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4137 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4138 storage requirements for this matrix. 4139 4140 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4141 processor than it must be used on all processors that share the object for 4142 that argument. 4143 4144 The user MUST specify either the local or global matrix dimensions 4145 (possibly both). 4146 4147 The parallel matrix is partitioned across processors such that the 4148 first m0 rows belong to process 0, the next m1 rows belong to 4149 process 1, the next m2 rows belong to process 2 etc.. where 4150 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4151 values corresponding to [m x N] submatrix. 4152 4153 The columns are logically partitioned with the n0 columns belonging 4154 to 0th partition, the next n1 columns belonging to the next 4155 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4156 4157 The DIAGONAL portion of the local submatrix on any given processor 4158 is the submatrix corresponding to the rows and columns m,n 4159 corresponding to the given processor. i.e diagonal matrix on 4160 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4161 etc. The remaining portion of the local submatrix [m x (N-n)] 4162 constitute the OFF-DIAGONAL portion. The example below better 4163 illustrates this concept. 4164 4165 For a square global matrix we define each processor's diagonal portion 4166 to be its local rows and the corresponding columns (a square submatrix); 4167 each processor's off-diagonal portion encompasses the remainder of the 4168 local matrix (a rectangular submatrix). 4169 4170 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4171 4172 When calling this routine with a single process communicator, a matrix of 4173 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4174 type of communicator, use the construction mechanism: 4175 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4176 4177 By default, this format uses inodes (identical nodes) when possible. 4178 We search for consecutive rows with the same nonzero structure, thereby 4179 reusing matrix information to achieve increased efficiency. 4180 4181 Options Database Keys: 4182 + -mat_no_inode - Do not use inodes 4183 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4184 - -mat_aij_oneindex - Internally use indexing starting at 1 4185 rather than 0. Note that when calling MatSetValues(), 4186 the user still MUST index entries starting at 0! 4187 4188 4189 Example usage: 4190 4191 Consider the following 8x8 matrix with 34 non-zero values, that is 4192 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4193 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4194 as follows: 4195 4196 .vb 4197 1 2 0 | 0 3 0 | 0 4 4198 Proc0 0 5 6 | 7 0 0 | 8 0 4199 9 0 10 | 11 0 0 | 12 0 4200 ------------------------------------- 4201 13 0 14 | 15 16 17 | 0 0 4202 Proc1 0 18 0 | 19 20 21 | 0 0 4203 0 0 0 | 22 23 0 | 24 0 4204 ------------------------------------- 4205 Proc2 25 26 27 | 0 0 28 | 29 0 4206 30 0 0 | 31 32 33 | 0 34 4207 .ve 4208 4209 This can be represented as a collection of submatrices as: 4210 4211 .vb 4212 A B C 4213 D E F 4214 G H I 4215 .ve 4216 4217 Where the submatrices A,B,C are owned by proc0, D,E,F are 4218 owned by proc1, G,H,I are owned by proc2. 4219 4220 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4221 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4222 The 'M','N' parameters are 8,8, and have the same values on all procs. 4223 4224 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4225 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4226 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4227 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4228 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4229 matrix, ans [DF] as another SeqAIJ matrix. 4230 4231 When d_nz, o_nz parameters are specified, d_nz storage elements are 4232 allocated for every row of the local diagonal submatrix, and o_nz 4233 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4234 One way to choose d_nz and o_nz is to use the max nonzerors per local 4235 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4236 In this case, the values of d_nz,o_nz are: 4237 .vb 4238 proc0 : dnz = 2, o_nz = 2 4239 proc1 : dnz = 3, o_nz = 2 4240 proc2 : dnz = 1, o_nz = 4 4241 .ve 4242 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4243 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4244 for proc3. i.e we are using 12+15+10=37 storage locations to store 4245 34 values. 4246 4247 When d_nnz, o_nnz parameters are specified, the storage is specified 4248 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4249 In the above case the values for d_nnz,o_nnz are: 4250 .vb 4251 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4252 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4253 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4254 .ve 4255 Here the space allocated is sum of all the above values i.e 34, and 4256 hence pre-allocation is perfect. 4257 4258 Level: intermediate 4259 4260 .keywords: matrix, aij, compressed row, sparse, parallel 4261 4262 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4263 MPIAIJ, MatCreateMPIAIJWithArrays() 4264 @*/ 4265 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4266 { 4267 PetscErrorCode ierr; 4268 PetscMPIInt size; 4269 4270 PetscFunctionBegin; 4271 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4272 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4273 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4274 if (size > 1) { 4275 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4276 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4277 } else { 4278 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4279 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4280 } 4281 PetscFunctionReturn(0); 4282 } 4283 4284 #undef __FUNCT__ 4285 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4286 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4287 { 4288 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4289 4290 PetscFunctionBegin; 4291 *Ad = a->A; 4292 *Ao = a->B; 4293 *colmap = a->garray; 4294 PetscFunctionReturn(0); 4295 } 4296 4297 #undef __FUNCT__ 4298 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4299 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4300 { 4301 PetscErrorCode ierr; 4302 PetscInt i; 4303 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4304 4305 PetscFunctionBegin; 4306 if (coloring->ctype == IS_COLORING_GLOBAL) { 4307 ISColoringValue *allcolors,*colors; 4308 ISColoring ocoloring; 4309 4310 /* set coloring for diagonal portion */ 4311 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4312 4313 /* set coloring for off-diagonal portion */ 4314 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4315 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4316 for (i=0; i<a->B->cmap->n; i++) { 4317 colors[i] = allcolors[a->garray[i]]; 4318 } 4319 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4320 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4321 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4322 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4323 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4324 ISColoringValue *colors; 4325 PetscInt *larray; 4326 ISColoring ocoloring; 4327 4328 /* set coloring for diagonal portion */ 4329 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4330 for (i=0; i<a->A->cmap->n; i++) { 4331 larray[i] = i + A->cmap->rstart; 4332 } 4333 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4334 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4335 for (i=0; i<a->A->cmap->n; i++) { 4336 colors[i] = coloring->colors[larray[i]]; 4337 } 4338 ierr = PetscFree(larray);CHKERRQ(ierr); 4339 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4340 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4341 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4342 4343 /* set coloring for off-diagonal portion */ 4344 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4345 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4346 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4347 for (i=0; i<a->B->cmap->n; i++) { 4348 colors[i] = coloring->colors[larray[i]]; 4349 } 4350 ierr = PetscFree(larray);CHKERRQ(ierr); 4351 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4352 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4353 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4354 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4355 PetscFunctionReturn(0); 4356 } 4357 4358 #undef __FUNCT__ 4359 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4360 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4361 { 4362 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4363 PetscErrorCode ierr; 4364 4365 PetscFunctionBegin; 4366 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4367 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4368 PetscFunctionReturn(0); 4369 } 4370 4371 #undef __FUNCT__ 4372 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4373 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4374 { 4375 PetscErrorCode ierr; 4376 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4377 PetscInt *indx; 4378 4379 PetscFunctionBegin; 4380 /* This routine will ONLY return MPIAIJ type matrix */ 4381 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4382 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4383 if (n == PETSC_DECIDE) { 4384 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4385 } 4386 /* Check sum(n) = N */ 4387 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4388 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4389 4390 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4391 rstart -= m; 4392 4393 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4394 for (i=0; i<m; i++) { 4395 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4396 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4397 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4398 } 4399 4400 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4401 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4402 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4403 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4404 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4405 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4406 PetscFunctionReturn(0); 4407 } 4408 4409 #undef __FUNCT__ 4410 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4411 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4412 { 4413 PetscErrorCode ierr; 4414 PetscInt m,N,i,rstart,nnz,Ii; 4415 PetscInt *indx; 4416 PetscScalar *values; 4417 4418 PetscFunctionBegin; 4419 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4420 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4421 for (i=0; i<m; i++) { 4422 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4423 Ii = i + rstart; 4424 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4425 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4426 } 4427 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4428 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4429 PetscFunctionReturn(0); 4430 } 4431 4432 #undef __FUNCT__ 4433 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4434 /*@ 4435 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4436 matrices from each processor 4437 4438 Collective on MPI_Comm 4439 4440 Input Parameters: 4441 + comm - the communicators the parallel matrix will live on 4442 . inmat - the input sequential matrices 4443 . n - number of local columns (or PETSC_DECIDE) 4444 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4445 4446 Output Parameter: 4447 . outmat - the parallel matrix generated 4448 4449 Level: advanced 4450 4451 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4452 4453 @*/ 4454 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4455 { 4456 PetscErrorCode ierr; 4457 4458 PetscFunctionBegin; 4459 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4460 if (scall == MAT_INITIAL_MATRIX) { 4461 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4462 } 4463 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4464 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4465 PetscFunctionReturn(0); 4466 } 4467 4468 #undef __FUNCT__ 4469 #define __FUNCT__ "MatFileSplit" 4470 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4471 { 4472 PetscErrorCode ierr; 4473 PetscMPIInt rank; 4474 PetscInt m,N,i,rstart,nnz; 4475 size_t len; 4476 const PetscInt *indx; 4477 PetscViewer out; 4478 char *name; 4479 Mat B; 4480 const PetscScalar *values; 4481 4482 PetscFunctionBegin; 4483 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4484 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4485 /* Should this be the type of the diagonal block of A? */ 4486 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4487 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4488 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4489 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4490 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4491 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4492 for (i=0; i<m; i++) { 4493 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4494 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4495 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4496 } 4497 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4498 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4499 4500 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4501 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4502 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4503 sprintf(name,"%s.%d",outfile,rank); 4504 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4505 ierr = PetscFree(name);CHKERRQ(ierr); 4506 ierr = MatView(B,out);CHKERRQ(ierr); 4507 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4508 ierr = MatDestroy(&B);CHKERRQ(ierr); 4509 PetscFunctionReturn(0); 4510 } 4511 4512 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4513 #undef __FUNCT__ 4514 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4515 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4516 { 4517 PetscErrorCode ierr; 4518 Mat_Merge_SeqsToMPI *merge; 4519 PetscContainer container; 4520 4521 PetscFunctionBegin; 4522 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4523 if (container) { 4524 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4525 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4526 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4527 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4528 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4529 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4530 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4531 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4532 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4533 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4534 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4535 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4536 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4537 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4538 ierr = PetscFree(merge);CHKERRQ(ierr); 4539 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4540 } 4541 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4542 PetscFunctionReturn(0); 4543 } 4544 4545 #include <../src/mat/utils/freespace.h> 4546 #include <petscbt.h> 4547 4548 #undef __FUNCT__ 4549 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4550 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4551 { 4552 PetscErrorCode ierr; 4553 MPI_Comm comm; 4554 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4555 PetscMPIInt size,rank,taga,*len_s; 4556 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4557 PetscInt proc,m; 4558 PetscInt **buf_ri,**buf_rj; 4559 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4560 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4561 MPI_Request *s_waits,*r_waits; 4562 MPI_Status *status; 4563 MatScalar *aa=a->a; 4564 MatScalar **abuf_r,*ba_i; 4565 Mat_Merge_SeqsToMPI *merge; 4566 PetscContainer container; 4567 4568 PetscFunctionBegin; 4569 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4570 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4571 4572 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4573 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4574 4575 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4576 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4577 4578 bi = merge->bi; 4579 bj = merge->bj; 4580 buf_ri = merge->buf_ri; 4581 buf_rj = merge->buf_rj; 4582 4583 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4584 owners = merge->rowmap->range; 4585 len_s = merge->len_s; 4586 4587 /* send and recv matrix values */ 4588 /*-----------------------------*/ 4589 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4590 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4591 4592 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4593 for (proc=0,k=0; proc<size; proc++) { 4594 if (!len_s[proc]) continue; 4595 i = owners[proc]; 4596 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4597 k++; 4598 } 4599 4600 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4601 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4602 ierr = PetscFree(status);CHKERRQ(ierr); 4603 4604 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4605 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4606 4607 /* insert mat values of mpimat */ 4608 /*----------------------------*/ 4609 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4610 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4611 4612 for (k=0; k<merge->nrecv; k++) { 4613 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4614 nrows = *(buf_ri_k[k]); 4615 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4616 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4617 } 4618 4619 /* set values of ba */ 4620 m = merge->rowmap->n; 4621 for (i=0; i<m; i++) { 4622 arow = owners[rank] + i; 4623 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4624 bnzi = bi[i+1] - bi[i]; 4625 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4626 4627 /* add local non-zero vals of this proc's seqmat into ba */ 4628 anzi = ai[arow+1] - ai[arow]; 4629 aj = a->j + ai[arow]; 4630 aa = a->a + ai[arow]; 4631 nextaj = 0; 4632 for (j=0; nextaj<anzi; j++) { 4633 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4634 ba_i[j] += aa[nextaj++]; 4635 } 4636 } 4637 4638 /* add received vals into ba */ 4639 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4640 /* i-th row */ 4641 if (i == *nextrow[k]) { 4642 anzi = *(nextai[k]+1) - *nextai[k]; 4643 aj = buf_rj[k] + *(nextai[k]); 4644 aa = abuf_r[k] + *(nextai[k]); 4645 nextaj = 0; 4646 for (j=0; nextaj<anzi; j++) { 4647 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4648 ba_i[j] += aa[nextaj++]; 4649 } 4650 } 4651 nextrow[k]++; nextai[k]++; 4652 } 4653 } 4654 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4655 } 4656 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4657 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4658 4659 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4660 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4661 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4662 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4663 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4664 PetscFunctionReturn(0); 4665 } 4666 4667 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4668 4669 #undef __FUNCT__ 4670 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4671 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4672 { 4673 PetscErrorCode ierr; 4674 Mat B_mpi; 4675 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4676 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4677 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4678 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4679 PetscInt len,proc,*dnz,*onz,bs,cbs; 4680 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4681 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4682 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4683 MPI_Status *status; 4684 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4685 PetscBT lnkbt; 4686 Mat_Merge_SeqsToMPI *merge; 4687 PetscContainer container; 4688 4689 PetscFunctionBegin; 4690 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4691 4692 /* make sure it is a PETSc comm */ 4693 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4694 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4695 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4696 4697 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4698 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4699 4700 /* determine row ownership */ 4701 /*---------------------------------------------------------*/ 4702 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4703 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4704 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4705 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4706 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4707 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4708 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4709 4710 m = merge->rowmap->n; 4711 owners = merge->rowmap->range; 4712 4713 /* determine the number of messages to send, their lengths */ 4714 /*---------------------------------------------------------*/ 4715 len_s = merge->len_s; 4716 4717 len = 0; /* length of buf_si[] */ 4718 merge->nsend = 0; 4719 for (proc=0; proc<size; proc++) { 4720 len_si[proc] = 0; 4721 if (proc == rank) { 4722 len_s[proc] = 0; 4723 } else { 4724 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4725 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4726 } 4727 if (len_s[proc]) { 4728 merge->nsend++; 4729 nrows = 0; 4730 for (i=owners[proc]; i<owners[proc+1]; i++) { 4731 if (ai[i+1] > ai[i]) nrows++; 4732 } 4733 len_si[proc] = 2*(nrows+1); 4734 len += len_si[proc]; 4735 } 4736 } 4737 4738 /* determine the number and length of messages to receive for ij-structure */ 4739 /*-------------------------------------------------------------------------*/ 4740 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4741 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4742 4743 /* post the Irecv of j-structure */ 4744 /*-------------------------------*/ 4745 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4746 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4747 4748 /* post the Isend of j-structure */ 4749 /*--------------------------------*/ 4750 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4751 4752 for (proc=0, k=0; proc<size; proc++) { 4753 if (!len_s[proc]) continue; 4754 i = owners[proc]; 4755 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4756 k++; 4757 } 4758 4759 /* receives and sends of j-structure are complete */ 4760 /*------------------------------------------------*/ 4761 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4762 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4763 4764 /* send and recv i-structure */ 4765 /*---------------------------*/ 4766 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4767 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4768 4769 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4770 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4771 for (proc=0,k=0; proc<size; proc++) { 4772 if (!len_s[proc]) continue; 4773 /* form outgoing message for i-structure: 4774 buf_si[0]: nrows to be sent 4775 [1:nrows]: row index (global) 4776 [nrows+1:2*nrows+1]: i-structure index 4777 */ 4778 /*-------------------------------------------*/ 4779 nrows = len_si[proc]/2 - 1; 4780 buf_si_i = buf_si + nrows+1; 4781 buf_si[0] = nrows; 4782 buf_si_i[0] = 0; 4783 nrows = 0; 4784 for (i=owners[proc]; i<owners[proc+1]; i++) { 4785 anzi = ai[i+1] - ai[i]; 4786 if (anzi) { 4787 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4788 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4789 nrows++; 4790 } 4791 } 4792 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4793 k++; 4794 buf_si += len_si[proc]; 4795 } 4796 4797 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4798 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4799 4800 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4801 for (i=0; i<merge->nrecv; i++) { 4802 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4803 } 4804 4805 ierr = PetscFree(len_si);CHKERRQ(ierr); 4806 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4807 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4808 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4809 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4810 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4811 ierr = PetscFree(status);CHKERRQ(ierr); 4812 4813 /* compute a local seq matrix in each processor */ 4814 /*----------------------------------------------*/ 4815 /* allocate bi array and free space for accumulating nonzero column info */ 4816 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4817 bi[0] = 0; 4818 4819 /* create and initialize a linked list */ 4820 nlnk = N+1; 4821 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4822 4823 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4824 len = ai[owners[rank+1]] - ai[owners[rank]]; 4825 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4826 4827 current_space = free_space; 4828 4829 /* determine symbolic info for each local row */ 4830 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4831 4832 for (k=0; k<merge->nrecv; k++) { 4833 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4834 nrows = *buf_ri_k[k]; 4835 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4836 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4837 } 4838 4839 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4840 len = 0; 4841 for (i=0; i<m; i++) { 4842 bnzi = 0; 4843 /* add local non-zero cols of this proc's seqmat into lnk */ 4844 arow = owners[rank] + i; 4845 anzi = ai[arow+1] - ai[arow]; 4846 aj = a->j + ai[arow]; 4847 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4848 bnzi += nlnk; 4849 /* add received col data into lnk */ 4850 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4851 if (i == *nextrow[k]) { /* i-th row */ 4852 anzi = *(nextai[k]+1) - *nextai[k]; 4853 aj = buf_rj[k] + *nextai[k]; 4854 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4855 bnzi += nlnk; 4856 nextrow[k]++; nextai[k]++; 4857 } 4858 } 4859 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4860 4861 /* if free space is not available, make more free space */ 4862 if (current_space->local_remaining<bnzi) { 4863 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4864 nspacedouble++; 4865 } 4866 /* copy data into free space, then initialize lnk */ 4867 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4868 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4869 4870 current_space->array += bnzi; 4871 current_space->local_used += bnzi; 4872 current_space->local_remaining -= bnzi; 4873 4874 bi[i+1] = bi[i] + bnzi; 4875 } 4876 4877 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4878 4879 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4880 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4881 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4882 4883 /* create symbolic parallel matrix B_mpi */ 4884 /*---------------------------------------*/ 4885 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4886 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4887 if (n==PETSC_DECIDE) { 4888 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4889 } else { 4890 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4891 } 4892 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4893 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4894 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4895 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4896 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4897 4898 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4899 B_mpi->assembled = PETSC_FALSE; 4900 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4901 merge->bi = bi; 4902 merge->bj = bj; 4903 merge->buf_ri = buf_ri; 4904 merge->buf_rj = buf_rj; 4905 merge->coi = NULL; 4906 merge->coj = NULL; 4907 merge->owners_co = NULL; 4908 4909 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4910 4911 /* attach the supporting struct to B_mpi for reuse */ 4912 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4913 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4914 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4915 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4916 *mpimat = B_mpi; 4917 4918 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4919 PetscFunctionReturn(0); 4920 } 4921 4922 #undef __FUNCT__ 4923 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4924 /*@C 4925 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4926 matrices from each processor 4927 4928 Collective on MPI_Comm 4929 4930 Input Parameters: 4931 + comm - the communicators the parallel matrix will live on 4932 . seqmat - the input sequential matrices 4933 . m - number of local rows (or PETSC_DECIDE) 4934 . n - number of local columns (or PETSC_DECIDE) 4935 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4936 4937 Output Parameter: 4938 . mpimat - the parallel matrix generated 4939 4940 Level: advanced 4941 4942 Notes: 4943 The dimensions of the sequential matrix in each processor MUST be the same. 4944 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4945 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4946 @*/ 4947 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4948 { 4949 PetscErrorCode ierr; 4950 PetscMPIInt size; 4951 4952 PetscFunctionBegin; 4953 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4954 if (size == 1) { 4955 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4956 if (scall == MAT_INITIAL_MATRIX) { 4957 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4958 } else { 4959 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4960 } 4961 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4962 PetscFunctionReturn(0); 4963 } 4964 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4965 if (scall == MAT_INITIAL_MATRIX) { 4966 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4967 } 4968 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4969 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4970 PetscFunctionReturn(0); 4971 } 4972 4973 #undef __FUNCT__ 4974 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4975 /*@ 4976 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4977 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4978 with MatGetSize() 4979 4980 Not Collective 4981 4982 Input Parameters: 4983 + A - the matrix 4984 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4985 4986 Output Parameter: 4987 . A_loc - the local sequential matrix generated 4988 4989 Level: developer 4990 4991 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4992 4993 @*/ 4994 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4995 { 4996 PetscErrorCode ierr; 4997 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4998 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4999 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5000 MatScalar *aa=a->a,*ba=b->a,*cam; 5001 PetscScalar *ca; 5002 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5003 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5004 PetscBool match; 5005 5006 PetscFunctionBegin; 5007 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5008 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5009 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5010 if (scall == MAT_INITIAL_MATRIX) { 5011 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5012 ci[0] = 0; 5013 for (i=0; i<am; i++) { 5014 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5015 } 5016 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5017 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5018 k = 0; 5019 for (i=0; i<am; i++) { 5020 ncols_o = bi[i+1] - bi[i]; 5021 ncols_d = ai[i+1] - ai[i]; 5022 /* off-diagonal portion of A */ 5023 for (jo=0; jo<ncols_o; jo++) { 5024 col = cmap[*bj]; 5025 if (col >= cstart) break; 5026 cj[k] = col; bj++; 5027 ca[k++] = *ba++; 5028 } 5029 /* diagonal portion of A */ 5030 for (j=0; j<ncols_d; j++) { 5031 cj[k] = cstart + *aj++; 5032 ca[k++] = *aa++; 5033 } 5034 /* off-diagonal portion of A */ 5035 for (j=jo; j<ncols_o; j++) { 5036 cj[k] = cmap[*bj++]; 5037 ca[k++] = *ba++; 5038 } 5039 } 5040 /* put together the new matrix */ 5041 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5042 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5043 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5044 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5045 mat->free_a = PETSC_TRUE; 5046 mat->free_ij = PETSC_TRUE; 5047 mat->nonew = 0; 5048 } else if (scall == MAT_REUSE_MATRIX) { 5049 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5050 ci = mat->i; cj = mat->j; cam = mat->a; 5051 for (i=0; i<am; i++) { 5052 /* off-diagonal portion of A */ 5053 ncols_o = bi[i+1] - bi[i]; 5054 for (jo=0; jo<ncols_o; jo++) { 5055 col = cmap[*bj]; 5056 if (col >= cstart) break; 5057 *cam++ = *ba++; bj++; 5058 } 5059 /* diagonal portion of A */ 5060 ncols_d = ai[i+1] - ai[i]; 5061 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5062 /* off-diagonal portion of A */ 5063 for (j=jo; j<ncols_o; j++) { 5064 *cam++ = *ba++; bj++; 5065 } 5066 } 5067 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5068 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5069 PetscFunctionReturn(0); 5070 } 5071 5072 #undef __FUNCT__ 5073 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5074 /*@C 5075 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5076 5077 Not Collective 5078 5079 Input Parameters: 5080 + A - the matrix 5081 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5082 - row, col - index sets of rows and columns to extract (or NULL) 5083 5084 Output Parameter: 5085 . A_loc - the local sequential matrix generated 5086 5087 Level: developer 5088 5089 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5090 5091 @*/ 5092 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5093 { 5094 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5095 PetscErrorCode ierr; 5096 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5097 IS isrowa,iscola; 5098 Mat *aloc; 5099 PetscBool match; 5100 5101 PetscFunctionBegin; 5102 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5103 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5104 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5105 if (!row) { 5106 start = A->rmap->rstart; end = A->rmap->rend; 5107 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5108 } else { 5109 isrowa = *row; 5110 } 5111 if (!col) { 5112 start = A->cmap->rstart; 5113 cmap = a->garray; 5114 nzA = a->A->cmap->n; 5115 nzB = a->B->cmap->n; 5116 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5117 ncols = 0; 5118 for (i=0; i<nzB; i++) { 5119 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5120 else break; 5121 } 5122 imark = i; 5123 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5124 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5125 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5126 } else { 5127 iscola = *col; 5128 } 5129 if (scall != MAT_INITIAL_MATRIX) { 5130 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5131 aloc[0] = *A_loc; 5132 } 5133 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5134 *A_loc = aloc[0]; 5135 ierr = PetscFree(aloc);CHKERRQ(ierr); 5136 if (!row) { 5137 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5138 } 5139 if (!col) { 5140 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5141 } 5142 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5143 PetscFunctionReturn(0); 5144 } 5145 5146 #undef __FUNCT__ 5147 #define __FUNCT__ "MatGetBrowsOfAcols" 5148 /*@C 5149 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5150 5151 Collective on Mat 5152 5153 Input Parameters: 5154 + A,B - the matrices in mpiaij format 5155 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5156 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5157 5158 Output Parameter: 5159 + rowb, colb - index sets of rows and columns of B to extract 5160 - B_seq - the sequential matrix generated 5161 5162 Level: developer 5163 5164 @*/ 5165 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5166 { 5167 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5168 PetscErrorCode ierr; 5169 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5170 IS isrowb,iscolb; 5171 Mat *bseq=NULL; 5172 5173 PetscFunctionBegin; 5174 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5175 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5176 } 5177 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5178 5179 if (scall == MAT_INITIAL_MATRIX) { 5180 start = A->cmap->rstart; 5181 cmap = a->garray; 5182 nzA = a->A->cmap->n; 5183 nzB = a->B->cmap->n; 5184 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5185 ncols = 0; 5186 for (i=0; i<nzB; i++) { /* row < local row index */ 5187 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5188 else break; 5189 } 5190 imark = i; 5191 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5192 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5193 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5194 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5195 } else { 5196 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5197 isrowb = *rowb; iscolb = *colb; 5198 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5199 bseq[0] = *B_seq; 5200 } 5201 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5202 *B_seq = bseq[0]; 5203 ierr = PetscFree(bseq);CHKERRQ(ierr); 5204 if (!rowb) { 5205 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5206 } else { 5207 *rowb = isrowb; 5208 } 5209 if (!colb) { 5210 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5211 } else { 5212 *colb = iscolb; 5213 } 5214 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5215 PetscFunctionReturn(0); 5216 } 5217 5218 #undef __FUNCT__ 5219 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5220 /* 5221 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5222 of the OFF-DIAGONAL portion of local A 5223 5224 Collective on Mat 5225 5226 Input Parameters: 5227 + A,B - the matrices in mpiaij format 5228 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5229 5230 Output Parameter: 5231 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5232 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5233 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5234 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5235 5236 Level: developer 5237 5238 */ 5239 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5240 { 5241 VecScatter_MPI_General *gen_to,*gen_from; 5242 PetscErrorCode ierr; 5243 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5244 Mat_SeqAIJ *b_oth; 5245 VecScatter ctx =a->Mvctx; 5246 MPI_Comm comm; 5247 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5248 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5249 PetscScalar *rvalues,*svalues; 5250 MatScalar *b_otha,*bufa,*bufA; 5251 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5252 MPI_Request *rwaits = NULL,*swaits = NULL; 5253 MPI_Status *sstatus,rstatus; 5254 PetscMPIInt jj; 5255 PetscInt *cols,sbs,rbs; 5256 PetscScalar *vals; 5257 5258 PetscFunctionBegin; 5259 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5260 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5261 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5262 } 5263 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5264 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5265 5266 gen_to = (VecScatter_MPI_General*)ctx->todata; 5267 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5268 rvalues = gen_from->values; /* holds the length of receiving row */ 5269 svalues = gen_to->values; /* holds the length of sending row */ 5270 nrecvs = gen_from->n; 5271 nsends = gen_to->n; 5272 5273 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5274 srow = gen_to->indices; /* local row index to be sent */ 5275 sstarts = gen_to->starts; 5276 sprocs = gen_to->procs; 5277 sstatus = gen_to->sstatus; 5278 sbs = gen_to->bs; 5279 rstarts = gen_from->starts; 5280 rprocs = gen_from->procs; 5281 rbs = gen_from->bs; 5282 5283 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5284 if (scall == MAT_INITIAL_MATRIX) { 5285 /* i-array */ 5286 /*---------*/ 5287 /* post receives */ 5288 for (i=0; i<nrecvs; i++) { 5289 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5290 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5291 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5292 } 5293 5294 /* pack the outgoing message */ 5295 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5296 5297 sstartsj[0] = 0; 5298 rstartsj[0] = 0; 5299 len = 0; /* total length of j or a array to be sent */ 5300 k = 0; 5301 for (i=0; i<nsends; i++) { 5302 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5303 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5304 for (j=0; j<nrows; j++) { 5305 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5306 for (l=0; l<sbs; l++) { 5307 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5308 5309 rowlen[j*sbs+l] = ncols; 5310 5311 len += ncols; 5312 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5313 } 5314 k++; 5315 } 5316 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5317 5318 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5319 } 5320 /* recvs and sends of i-array are completed */ 5321 i = nrecvs; 5322 while (i--) { 5323 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5324 } 5325 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5326 5327 /* allocate buffers for sending j and a arrays */ 5328 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5329 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5330 5331 /* create i-array of B_oth */ 5332 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5333 5334 b_othi[0] = 0; 5335 len = 0; /* total length of j or a array to be received */ 5336 k = 0; 5337 for (i=0; i<nrecvs; i++) { 5338 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5339 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5340 for (j=0; j<nrows; j++) { 5341 b_othi[k+1] = b_othi[k] + rowlen[j]; 5342 len += rowlen[j]; k++; 5343 } 5344 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5345 } 5346 5347 /* allocate space for j and a arrrays of B_oth */ 5348 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5349 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5350 5351 /* j-array */ 5352 /*---------*/ 5353 /* post receives of j-array */ 5354 for (i=0; i<nrecvs; i++) { 5355 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5356 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5357 } 5358 5359 /* pack the outgoing message j-array */ 5360 k = 0; 5361 for (i=0; i<nsends; i++) { 5362 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5363 bufJ = bufj+sstartsj[i]; 5364 for (j=0; j<nrows; j++) { 5365 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5366 for (ll=0; ll<sbs; ll++) { 5367 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5368 for (l=0; l<ncols; l++) { 5369 *bufJ++ = cols[l]; 5370 } 5371 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5372 } 5373 } 5374 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5375 } 5376 5377 /* recvs and sends of j-array are completed */ 5378 i = nrecvs; 5379 while (i--) { 5380 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5381 } 5382 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5383 } else if (scall == MAT_REUSE_MATRIX) { 5384 sstartsj = *startsj_s; 5385 rstartsj = *startsj_r; 5386 bufa = *bufa_ptr; 5387 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5388 b_otha = b_oth->a; 5389 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5390 5391 /* a-array */ 5392 /*---------*/ 5393 /* post receives of a-array */ 5394 for (i=0; i<nrecvs; i++) { 5395 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5396 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5397 } 5398 5399 /* pack the outgoing message a-array */ 5400 k = 0; 5401 for (i=0; i<nsends; i++) { 5402 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5403 bufA = bufa+sstartsj[i]; 5404 for (j=0; j<nrows; j++) { 5405 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5406 for (ll=0; ll<sbs; ll++) { 5407 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5408 for (l=0; l<ncols; l++) { 5409 *bufA++ = vals[l]; 5410 } 5411 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5412 } 5413 } 5414 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5415 } 5416 /* recvs and sends of a-array are completed */ 5417 i = nrecvs; 5418 while (i--) { 5419 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5420 } 5421 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5422 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5423 5424 if (scall == MAT_INITIAL_MATRIX) { 5425 /* put together the new matrix */ 5426 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5427 5428 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5429 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5430 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5431 b_oth->free_a = PETSC_TRUE; 5432 b_oth->free_ij = PETSC_TRUE; 5433 b_oth->nonew = 0; 5434 5435 ierr = PetscFree(bufj);CHKERRQ(ierr); 5436 if (!startsj_s || !bufa_ptr) { 5437 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5438 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5439 } else { 5440 *startsj_s = sstartsj; 5441 *startsj_r = rstartsj; 5442 *bufa_ptr = bufa; 5443 } 5444 } 5445 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5446 PetscFunctionReturn(0); 5447 } 5448 5449 #undef __FUNCT__ 5450 #define __FUNCT__ "MatGetCommunicationStructs" 5451 /*@C 5452 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5453 5454 Not Collective 5455 5456 Input Parameters: 5457 . A - The matrix in mpiaij format 5458 5459 Output Parameter: 5460 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5461 . colmap - A map from global column index to local index into lvec 5462 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5463 5464 Level: developer 5465 5466 @*/ 5467 #if defined(PETSC_USE_CTABLE) 5468 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5469 #else 5470 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5471 #endif 5472 { 5473 Mat_MPIAIJ *a; 5474 5475 PetscFunctionBegin; 5476 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5477 PetscValidPointer(lvec, 2); 5478 PetscValidPointer(colmap, 3); 5479 PetscValidPointer(multScatter, 4); 5480 a = (Mat_MPIAIJ*) A->data; 5481 if (lvec) *lvec = a->lvec; 5482 if (colmap) *colmap = a->colmap; 5483 if (multScatter) *multScatter = a->Mvctx; 5484 PetscFunctionReturn(0); 5485 } 5486 5487 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5488 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5489 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5490 5491 #undef __FUNCT__ 5492 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5493 /* 5494 Computes (B'*A')' since computing B*A directly is untenable 5495 5496 n p p 5497 ( ) ( ) ( ) 5498 m ( A ) * n ( B ) = m ( C ) 5499 ( ) ( ) ( ) 5500 5501 */ 5502 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5503 { 5504 PetscErrorCode ierr; 5505 Mat At,Bt,Ct; 5506 5507 PetscFunctionBegin; 5508 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5509 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5510 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5511 ierr = MatDestroy(&At);CHKERRQ(ierr); 5512 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5513 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5514 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5515 PetscFunctionReturn(0); 5516 } 5517 5518 #undef __FUNCT__ 5519 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5520 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5521 { 5522 PetscErrorCode ierr; 5523 PetscInt m=A->rmap->n,n=B->cmap->n; 5524 Mat Cmat; 5525 5526 PetscFunctionBegin; 5527 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5528 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5529 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5530 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5531 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5532 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5533 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5534 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5535 5536 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5537 5538 *C = Cmat; 5539 PetscFunctionReturn(0); 5540 } 5541 5542 /* ----------------------------------------------------------------*/ 5543 #undef __FUNCT__ 5544 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5545 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5546 { 5547 PetscErrorCode ierr; 5548 5549 PetscFunctionBegin; 5550 if (scall == MAT_INITIAL_MATRIX) { 5551 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5552 } 5553 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5554 PetscFunctionReturn(0); 5555 } 5556 5557 #if defined(PETSC_HAVE_MUMPS) 5558 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5559 #endif 5560 #if defined(PETSC_HAVE_PASTIX) 5561 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5562 #endif 5563 #if defined(PETSC_HAVE_SUPERLU_DIST) 5564 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5565 #endif 5566 #if defined(PETSC_HAVE_CLIQUE) 5567 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5568 #endif 5569 5570 /*MC 5571 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5572 5573 Options Database Keys: 5574 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5575 5576 Level: beginner 5577 5578 .seealso: MatCreateAIJ() 5579 M*/ 5580 5581 #undef __FUNCT__ 5582 #define __FUNCT__ "MatCreate_MPIAIJ" 5583 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5584 { 5585 Mat_MPIAIJ *b; 5586 PetscErrorCode ierr; 5587 PetscMPIInt size; 5588 5589 PetscFunctionBegin; 5590 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5591 5592 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5593 B->data = (void*)b; 5594 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5595 B->assembled = PETSC_FALSE; 5596 B->insertmode = NOT_SET_VALUES; 5597 b->size = size; 5598 5599 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5600 5601 /* build cache for off array entries formed */ 5602 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5603 5604 b->donotstash = PETSC_FALSE; 5605 b->colmap = 0; 5606 b->garray = 0; 5607 b->roworiented = PETSC_TRUE; 5608 5609 /* stuff used for matrix vector multiply */ 5610 b->lvec = NULL; 5611 b->Mvctx = NULL; 5612 5613 /* stuff for MatGetRow() */ 5614 b->rowindices = 0; 5615 b->rowvalues = 0; 5616 b->getrowactive = PETSC_FALSE; 5617 5618 /* flexible pointer used in CUSP/CUSPARSE classes */ 5619 b->spptr = NULL; 5620 5621 #if defined(PETSC_HAVE_MUMPS) 5622 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C","MatGetFactor_aij_mumps",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5623 #endif 5624 #if defined(PETSC_HAVE_PASTIX) 5625 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C","MatGetFactor_mpiaij_pastix",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5626 #endif 5627 #if defined(PETSC_HAVE_SUPERLU_DIST) 5628 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C","MatGetFactor_mpiaij_superlu_dist",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5629 #endif 5630 #if defined(PETSC_HAVE_CLIQUE) 5631 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C","MatGetFactor_aij_clique",MatGetFactor_aij_clique);CHKERRQ(ierr); 5632 #endif 5633 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_MPIAIJ",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5634 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_MPIAIJ",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5635 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C","MatGetDiagonalBlock_MPIAIJ",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5636 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C","MatIsTranspose_MPIAIJ",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5637 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C","MatMPIAIJSetPreallocation_MPIAIJ",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5638 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C","MatMPIAIJSetPreallocationCSR_MPIAIJ",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5639 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C","MatDiagonalScaleLocal_MPIAIJ",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5640 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C","MatConvert_MPIAIJ_MPIAIJPERM",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5641 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C","MatConvert_MPIAIJ_MPIAIJCRL",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C","MatConvert_MPIAIJ_MPISBAIJ",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5643 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C","MatMatMult_MPIDense_MPIAIJ",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5644 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C","MatMatMultSymbolic_MPIDense_MPIAIJ",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5645 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C","MatMatMultNumeric_MPIDense_MPIAIJ",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5646 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5647 PetscFunctionReturn(0); 5648 } 5649 5650 #undef __FUNCT__ 5651 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5652 /*@ 5653 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5654 and "off-diagonal" part of the matrix in CSR format. 5655 5656 Collective on MPI_Comm 5657 5658 Input Parameters: 5659 + comm - MPI communicator 5660 . m - number of local rows (Cannot be PETSC_DECIDE) 5661 . n - This value should be the same as the local size used in creating the 5662 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5663 calculated if N is given) For square matrices n is almost always m. 5664 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5665 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5666 . i - row indices for "diagonal" portion of matrix 5667 . j - column indices 5668 . a - matrix values 5669 . oi - row indices for "off-diagonal" portion of matrix 5670 . oj - column indices 5671 - oa - matrix values 5672 5673 Output Parameter: 5674 . mat - the matrix 5675 5676 Level: advanced 5677 5678 Notes: 5679 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5680 must free the arrays once the matrix has been destroyed and not before. 5681 5682 The i and j indices are 0 based 5683 5684 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5685 5686 This sets local rows and cannot be used to set off-processor values. 5687 5688 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5689 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5690 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5691 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5692 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5693 communication if it is known that only local entries will be set. 5694 5695 .keywords: matrix, aij, compressed row, sparse, parallel 5696 5697 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5698 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5699 @*/ 5700 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5701 { 5702 PetscErrorCode ierr; 5703 Mat_MPIAIJ *maij; 5704 5705 PetscFunctionBegin; 5706 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5707 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5708 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5709 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5710 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5711 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5712 maij = (Mat_MPIAIJ*) (*mat)->data; 5713 5714 (*mat)->preallocated = PETSC_TRUE; 5715 5716 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5717 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5718 5719 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5720 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5721 5722 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5723 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5724 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5725 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5726 5727 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5728 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5729 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5730 PetscFunctionReturn(0); 5731 } 5732 5733 /* 5734 Special version for direct calls from Fortran 5735 */ 5736 #include <petsc-private/fortranimpl.h> 5737 5738 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5739 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5740 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5741 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5742 #endif 5743 5744 /* Change these macros so can be used in void function */ 5745 #undef CHKERRQ 5746 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5747 #undef SETERRQ2 5748 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5749 #undef SETERRQ3 5750 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5751 #undef SETERRQ 5752 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5753 5754 #undef __FUNCT__ 5755 #define __FUNCT__ "matsetvaluesmpiaij_" 5756 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5757 { 5758 Mat mat = *mmat; 5759 PetscInt m = *mm, n = *mn; 5760 InsertMode addv = *maddv; 5761 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5762 PetscScalar value; 5763 PetscErrorCode ierr; 5764 5765 MatCheckPreallocated(mat,1); 5766 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5767 5768 #if defined(PETSC_USE_DEBUG) 5769 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5770 #endif 5771 { 5772 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5773 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5774 PetscBool roworiented = aij->roworiented; 5775 5776 /* Some Variables required in the macro */ 5777 Mat A = aij->A; 5778 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5779 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5780 MatScalar *aa = a->a; 5781 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5782 Mat B = aij->B; 5783 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5784 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5785 MatScalar *ba = b->a; 5786 5787 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5788 PetscInt nonew = a->nonew; 5789 MatScalar *ap1,*ap2; 5790 5791 PetscFunctionBegin; 5792 for (i=0; i<m; i++) { 5793 if (im[i] < 0) continue; 5794 #if defined(PETSC_USE_DEBUG) 5795 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5796 #endif 5797 if (im[i] >= rstart && im[i] < rend) { 5798 row = im[i] - rstart; 5799 lastcol1 = -1; 5800 rp1 = aj + ai[row]; 5801 ap1 = aa + ai[row]; 5802 rmax1 = aimax[row]; 5803 nrow1 = ailen[row]; 5804 low1 = 0; 5805 high1 = nrow1; 5806 lastcol2 = -1; 5807 rp2 = bj + bi[row]; 5808 ap2 = ba + bi[row]; 5809 rmax2 = bimax[row]; 5810 nrow2 = bilen[row]; 5811 low2 = 0; 5812 high2 = nrow2; 5813 5814 for (j=0; j<n; j++) { 5815 if (roworiented) value = v[i*n+j]; 5816 else value = v[i+j*m]; 5817 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5818 if (in[j] >= cstart && in[j] < cend) { 5819 col = in[j] - cstart; 5820 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5821 } else if (in[j] < 0) continue; 5822 #if defined(PETSC_USE_DEBUG) 5823 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5824 #endif 5825 else { 5826 if (mat->was_assembled) { 5827 if (!aij->colmap) { 5828 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5829 } 5830 #if defined(PETSC_USE_CTABLE) 5831 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5832 col--; 5833 #else 5834 col = aij->colmap[in[j]] - 1; 5835 #endif 5836 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5837 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5838 col = in[j]; 5839 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5840 B = aij->B; 5841 b = (Mat_SeqAIJ*)B->data; 5842 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5843 rp2 = bj + bi[row]; 5844 ap2 = ba + bi[row]; 5845 rmax2 = bimax[row]; 5846 nrow2 = bilen[row]; 5847 low2 = 0; 5848 high2 = nrow2; 5849 bm = aij->B->rmap->n; 5850 ba = b->a; 5851 } 5852 } else col = in[j]; 5853 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5854 } 5855 } 5856 } else if (!aij->donotstash) { 5857 if (roworiented) { 5858 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5859 } else { 5860 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5861 } 5862 } 5863 } 5864 } 5865 PetscFunctionReturnVoid(); 5866 } 5867 5868