1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 138 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 139 if (type == NORM_2) { 140 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 141 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 142 } 143 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 144 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 145 } 146 } else if (type == NORM_1) { 147 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 148 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 149 } 150 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 151 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 152 } 153 } else if (type == NORM_INFINITY) { 154 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 155 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 156 } 157 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 158 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 159 } 160 161 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 162 if (type == NORM_INFINITY) { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 164 } else { 165 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 166 } 167 ierr = PetscFree(work);CHKERRQ(ierr); 168 if (type == NORM_2) { 169 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 170 } 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNCT__ 175 #define __FUNCT__ "MatDistribute_MPIAIJ" 176 /* 177 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 178 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 179 180 Only for square matrices 181 182 Used by a preconditioner, hence PETSC_EXTERN 183 */ 184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 185 { 186 PetscMPIInt rank,size; 187 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 188 PetscErrorCode ierr; 189 Mat mat; 190 Mat_SeqAIJ *gmata; 191 PetscMPIInt tag; 192 MPI_Status status; 193 PetscBool aij; 194 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 195 196 PetscFunctionBegin; 197 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 199 if (!rank) { 200 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 201 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 202 } 203 if (reuse == MAT_INITIAL_MATRIX) { 204 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 205 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 206 if (!rank) { 207 bses[0] = gmat->rmap->bs; 208 bses[1] = gmat->cmap->bs; 209 } 210 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 211 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 212 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 213 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 214 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 215 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 216 217 rowners[0] = 0; 218 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 219 rstart = rowners[rank]; 220 rend = rowners[rank+1]; 221 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 222 if (!rank) { 223 gmata = (Mat_SeqAIJ*) gmat->data; 224 /* send row lengths to all processors */ 225 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 226 for (i=1; i<size; i++) { 227 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 228 } 229 /* determine number diagonal and off-diagonal counts */ 230 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 231 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 232 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 233 jj = 0; 234 for (i=0; i<m; i++) { 235 for (j=0; j<dlens[i]; j++) { 236 if (gmata->j[jj] < rstart) ld[i]++; 237 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 238 jj++; 239 } 240 } 241 /* send column indices to other processes */ 242 for (i=1; i<size; i++) { 243 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 244 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 245 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 246 } 247 248 /* send numerical values to other processes */ 249 for (i=1; i<size; i++) { 250 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 251 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 252 } 253 gmataa = gmata->a; 254 gmataj = gmata->j; 255 256 } else { 257 /* receive row lengths */ 258 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 /* receive column indices */ 260 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 262 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 263 /* determine number diagonal and off-diagonal counts */ 264 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 265 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 266 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 267 jj = 0; 268 for (i=0; i<m; i++) { 269 for (j=0; j<dlens[i]; j++) { 270 if (gmataj[jj] < rstart) ld[i]++; 271 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 272 jj++; 273 } 274 } 275 /* receive numerical values */ 276 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 277 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 278 } 279 /* set preallocation */ 280 for (i=0; i<m; i++) { 281 dlens[i] -= olens[i]; 282 } 283 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 284 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 285 286 for (i=0; i<m; i++) { 287 dlens[i] += olens[i]; 288 } 289 cnt = 0; 290 for (i=0; i<m; i++) { 291 row = rstart + i; 292 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 293 cnt += dlens[i]; 294 } 295 if (rank) { 296 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 297 } 298 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 299 ierr = PetscFree(rowners);CHKERRQ(ierr); 300 301 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 302 303 *inmat = mat; 304 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 307 mat = *inmat; 308 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 309 if (!rank) { 310 /* send numerical values to other processes */ 311 gmata = (Mat_SeqAIJ*) gmat->data; 312 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 313 gmataa = gmata->a; 314 for (i=1; i<size; i++) { 315 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 316 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 317 } 318 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 319 } else { 320 /* receive numerical values from process 0*/ 321 nz = Ad->nz + Ao->nz; 322 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 326 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 327 ad = Ad->a; 328 ao = Ao->a; 329 if (mat->rmap->n) { 330 i = 0; 331 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 for (i=1; i<mat->rmap->n; i++) { 335 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 336 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 337 } 338 i--; 339 if (mat->rmap->n) { 340 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 341 } 342 if (rank) { 343 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 344 } 345 } 346 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 /* 352 Local utility routine that creates a mapping from the global column 353 number to the local number in the off-diagonal part of the local 354 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 355 a slightly higher hash table cost; without it it is not scalable (each processor 356 has an order N integer array but is fast to acess. 357 */ 358 #undef __FUNCT__ 359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 361 { 362 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 363 PetscErrorCode ierr; 364 PetscInt n = aij->B->cmap->n,i; 365 366 PetscFunctionBegin; 367 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 368 #if defined(PETSC_USE_CTABLE) 369 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 370 for (i=0; i<n; i++) { 371 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 372 } 373 #else 374 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 375 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 376 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 377 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 378 #endif 379 PetscFunctionReturn(0); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 383 { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col;\ 387 while (high1-low1 > 5) { \ 388 t = (low1+high1)/2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i=low1; _i<high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) ap1[_i] += value; \ 396 else ap1[_i] = value; \ 397 goto a_noinsert; \ 398 } \ 399 } \ 400 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 401 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 402 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 403 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 404 N = nrow1++ - 1; a->nz++; high1++; \ 405 /* shift up all the later entries in this row */ \ 406 for (ii=N; ii>=_i; ii--) { \ 407 rp1[ii+1] = rp1[ii]; \ 408 ap1[ii+1] = ap1[ii]; \ 409 } \ 410 rp1[_i] = col; \ 411 ap1[_i] = value; \ 412 a_noinsert: ; \ 413 ailen[row] = nrow1; \ 414 } 415 416 417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 418 { \ 419 if (col <= lastcol2) low2 = 0; \ 420 else high2 = nrow2; \ 421 lastcol2 = col; \ 422 while (high2-low2 > 5) { \ 423 t = (low2+high2)/2; \ 424 if (rp2[t] > col) high2 = t; \ 425 else low2 = t; \ 426 } \ 427 for (_i=low2; _i<high2; _i++) { \ 428 if (rp2[_i] > col) break; \ 429 if (rp2[_i] == col) { \ 430 if (addv == ADD_VALUES) ap2[_i] += value; \ 431 else ap2[_i] = value; \ 432 goto b_noinsert; \ 433 } \ 434 } \ 435 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 436 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 437 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 438 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 439 N = nrow2++ - 1; b->nz++; high2++; \ 440 /* shift up all the later entries in this row */ \ 441 for (ii=N; ii>=_i; ii--) { \ 442 rp2[ii+1] = rp2[ii]; \ 443 ap2[ii+1] = ap2[ii]; \ 444 } \ 445 rp2[_i] = col; \ 446 ap2[_i] = value; \ 447 b_noinsert: ; \ 448 bilen[row] = nrow2; \ 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 454 { 455 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 456 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 457 PetscErrorCode ierr; 458 PetscInt l,*garray = mat->garray,diag; 459 460 PetscFunctionBegin; 461 /* code only works for square matrices A */ 462 463 /* find size of row to the left of the diagonal part */ 464 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 465 row = row - diag; 466 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 467 if (garray[b->j[b->i[row]+l]] > diag) break; 468 } 469 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 470 471 /* diagonal part */ 472 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 473 474 /* right of diagonal part */ 475 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatSetValues_MPIAIJ" 481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 PetscScalar value; 485 PetscErrorCode ierr; 486 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 487 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 488 PetscBool roworiented = aij->roworiented; 489 490 /* Some Variables required in the macro */ 491 Mat A = aij->A; 492 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 493 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 494 MatScalar *aa = a->a; 495 PetscBool ignorezeroentries = a->ignorezeroentries; 496 Mat B = aij->B; 497 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 498 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 499 MatScalar *ba = b->a; 500 501 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 502 PetscInt nonew; 503 MatScalar *ap1,*ap2; 504 505 PetscFunctionBegin; 506 if (v) PetscValidScalarPointer(v,6); 507 for (i=0; i<m; i++) { 508 if (im[i] < 0) continue; 509 #if defined(PETSC_USE_DEBUG) 510 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 511 #endif 512 if (im[i] >= rstart && im[i] < rend) { 513 row = im[i] - rstart; 514 lastcol1 = -1; 515 rp1 = aj + ai[row]; 516 ap1 = aa + ai[row]; 517 rmax1 = aimax[row]; 518 nrow1 = ailen[row]; 519 low1 = 0; 520 high1 = nrow1; 521 lastcol2 = -1; 522 rp2 = bj + bi[row]; 523 ap2 = ba + bi[row]; 524 rmax2 = bimax[row]; 525 nrow2 = bilen[row]; 526 low2 = 0; 527 high2 = nrow2; 528 529 for (j=0; j<n; j++) { 530 if (v) { 531 if (roworiented) value = v[i*n+j]; 532 else value = v[i+j*m]; 533 } else value = 0.0; 534 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 535 if (in[j] >= cstart && in[j] < cend) { 536 col = in[j] - cstart; 537 nonew = a->nonew; 538 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 539 } else if (in[j] < 0) continue; 540 #if defined(PETSC_USE_DEBUG) 541 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 542 #endif 543 else { 544 if (mat->was_assembled) { 545 if (!aij->colmap) { 546 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 547 } 548 #if defined(PETSC_USE_CTABLE) 549 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 550 col--; 551 #else 552 col = aij->colmap[in[j]] - 1; 553 #endif 554 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 555 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 556 col = in[j]; 557 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 558 B = aij->B; 559 b = (Mat_SeqAIJ*)B->data; 560 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 561 rp2 = bj + bi[row]; 562 ap2 = ba + bi[row]; 563 rmax2 = bimax[row]; 564 nrow2 = bilen[row]; 565 low2 = 0; 566 high2 = nrow2; 567 bm = aij->B->rmap->n; 568 ba = b->a; 569 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 570 } else col = in[j]; 571 nonew = b->nonew; 572 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 573 } 574 } 575 } else { 576 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 577 if (!aij->donotstash) { 578 mat->assembled = PETSC_FALSE; 579 if (roworiented) { 580 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 581 } else { 582 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 583 } 584 } 585 } 586 } 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatGetValues_MPIAIJ" 592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 593 { 594 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 595 PetscErrorCode ierr; 596 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 597 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 598 599 PetscFunctionBegin; 600 for (i=0; i<m; i++) { 601 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 602 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 603 if (idxm[i] >= rstart && idxm[i] < rend) { 604 row = idxm[i] - rstart; 605 for (j=0; j<n; j++) { 606 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 607 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 608 if (idxn[j] >= cstart && idxn[j] < cend) { 609 col = idxn[j] - cstart; 610 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 611 } else { 612 if (!aij->colmap) { 613 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 614 } 615 #if defined(PETSC_USE_CTABLE) 616 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 617 col--; 618 #else 619 col = aij->colmap[idxn[j]] - 1; 620 #endif 621 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 622 else { 623 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 624 } 625 } 626 } 627 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 628 } 629 PetscFunctionReturn(0); 630 } 631 632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt nstash,reallocs; 641 InsertMode addv; 642 643 PetscFunctionBegin; 644 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 645 646 /* make sure all processors are either in INSERTMODE or ADDMODE */ 647 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 648 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 649 mat->insertmode = addv; /* in case this processor had no cache */ 650 651 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 652 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 653 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 660 { 661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 662 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 663 PetscErrorCode ierr; 664 PetscMPIInt n; 665 PetscInt i,j,rstart,ncols,flg; 666 PetscInt *row,*col; 667 PetscBool other_disassembled; 668 PetscScalar *val; 669 InsertMode addv = mat->insertmode; 670 671 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 672 673 PetscFunctionBegin; 674 if (!aij->donotstash && !mat->nooffprocentries) { 675 while (1) { 676 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 677 if (!flg) break; 678 679 for (i=0; i<n; ) { 680 /* Now identify the consecutive vals belonging to the same row */ 681 for (j=i,rstart=row[j]; j<n; j++) { 682 if (row[j] != rstart) break; 683 } 684 if (j < n) ncols = j-i; 685 else ncols = n-i; 686 /* Now assemble all these values with a single function call */ 687 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 688 689 i = j; 690 } 691 } 692 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 693 } 694 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 695 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 696 697 /* determine if any processor has disassembled, if so we must 698 also disassemble ourselfs, in order that we may reassemble. */ 699 /* 700 if nonzero structure of submatrix B cannot change then we know that 701 no processor disassembled thus we can skip this stuff 702 */ 703 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 704 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 705 if (mat->was_assembled && !other_disassembled) { 706 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 707 } 708 } 709 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 710 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 711 } 712 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 713 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 714 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 715 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 716 717 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 718 719 aij->rowvalues = 0; 720 721 /* used by MatAXPY() */ 722 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 723 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 724 725 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 726 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 733 { 734 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 739 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatZeroRows_MPIAIJ" 745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 746 { 747 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 748 PetscErrorCode ierr; 749 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 750 PetscInt i,*owners = A->rmap->range; 751 PetscInt *nprocs,j,idx,nsends,row; 752 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 753 PetscInt *rvalues,count,base,slen,*source; 754 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 755 MPI_Comm comm; 756 MPI_Request *send_waits,*recv_waits; 757 MPI_Status recv_status,*send_status; 758 const PetscScalar *xx; 759 PetscScalar *bb; 760 #if defined(PETSC_DEBUG) 761 PetscBool found = PETSC_FALSE; 762 #endif 763 764 PetscFunctionBegin; 765 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 766 /* first count number of contributors to each processor */ 767 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 768 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 769 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 770 j = 0; 771 for (i=0; i<N; i++) { 772 if (lastidx > (idx = rows[i])) j = 0; 773 lastidx = idx; 774 for (; j<size; j++) { 775 if (idx >= owners[j] && idx < owners[j+1]) { 776 nprocs[2*j]++; 777 nprocs[2*j+1] = 1; 778 owner[i] = j; 779 #if defined(PETSC_DEBUG) 780 found = PETSC_TRUE; 781 #endif 782 break; 783 } 784 } 785 #if defined(PETSC_DEBUG) 786 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 787 found = PETSC_FALSE; 788 #endif 789 } 790 nsends = 0; 791 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 792 793 if (A->nooffproczerorows) { 794 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 795 nrecvs = nsends; 796 nmax = N; 797 } else { 798 /* inform other processors of number of messages and max length*/ 799 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 800 } 801 802 /* post receives: */ 803 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 804 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 805 for (i=0; i<nrecvs; i++) { 806 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 807 } 808 809 /* do sends: 810 1) starts[i] gives the starting index in svalues for stuff going to 811 the ith processor 812 */ 813 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 814 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 815 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 816 817 starts[0] = 0; 818 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 819 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 820 821 starts[0] = 0; 822 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 823 count = 0; 824 for (i=0; i<size; i++) { 825 if (nprocs[2*i+1]) { 826 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 827 } 828 } 829 ierr = PetscFree(starts);CHKERRQ(ierr); 830 831 base = owners[rank]; 832 833 /* wait on receives */ 834 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 835 count = nrecvs; slen = 0; 836 while (count) { 837 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 838 /* unpack receives into our local space */ 839 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 840 841 source[imdex] = recv_status.MPI_SOURCE; 842 lens[imdex] = n; 843 slen += n; 844 count--; 845 } 846 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 847 848 /* move the data into the send scatter */ 849 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 850 count = 0; 851 for (i=0; i<nrecvs; i++) { 852 values = rvalues + i*nmax; 853 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 854 } 855 ierr = PetscFree(rvalues);CHKERRQ(ierr); 856 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 857 ierr = PetscFree(owner);CHKERRQ(ierr); 858 ierr = PetscFree(nprocs);CHKERRQ(ierr); 859 860 /* fix right hand side if needed */ 861 if (x && b) { 862 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 863 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 864 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 865 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 866 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 867 } 868 /* 869 Zero the required rows. If the "diagonal block" of the matrix 870 is square and the user wishes to set the diagonal we use separate 871 code so that MatSetValues() is not called for each diagonal allocating 872 new memory, thus calling lots of mallocs and slowing things down. 873 874 */ 875 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 876 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 877 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 878 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 879 } else if (diag != 0.0) { 880 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 881 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 882 for (i = 0; i < slen; i++) { 883 row = lrows[i] + rstart; 884 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 885 } 886 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 } else { 889 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 890 } 891 ierr = PetscFree(lrows);CHKERRQ(ierr); 892 893 /* wait on sends */ 894 if (nsends) { 895 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 896 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 897 ierr = PetscFree(send_status);CHKERRQ(ierr); 898 } 899 ierr = PetscFree(send_waits);CHKERRQ(ierr); 900 ierr = PetscFree(svalues);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 907 { 908 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 909 PetscErrorCode ierr; 910 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 911 PetscInt i,*owners = A->rmap->range; 912 PetscInt *nprocs,j,idx,nsends; 913 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 914 PetscInt *rvalues,count,base,slen,*source; 915 PetscInt *lens,*lrows,*values,m; 916 MPI_Comm comm; 917 MPI_Request *send_waits,*recv_waits; 918 MPI_Status recv_status,*send_status; 919 const PetscScalar *xx; 920 PetscScalar *bb,*mask; 921 Vec xmask,lmask; 922 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 923 const PetscInt *aj, *ii,*ridx; 924 PetscScalar *aa; 925 #if defined(PETSC_DEBUG) 926 PetscBool found = PETSC_FALSE; 927 #endif 928 929 PetscFunctionBegin; 930 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 931 /* first count number of contributors to each processor */ 932 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 933 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 934 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 935 j = 0; 936 for (i=0; i<N; i++) { 937 if (lastidx > (idx = rows[i])) j = 0; 938 lastidx = idx; 939 for (; j<size; j++) { 940 if (idx >= owners[j] && idx < owners[j+1]) { 941 nprocs[2*j]++; 942 nprocs[2*j+1] = 1; 943 owner[i] = j; 944 #if defined(PETSC_DEBUG) 945 found = PETSC_TRUE; 946 #endif 947 break; 948 } 949 } 950 #if defined(PETSC_DEBUG) 951 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 952 found = PETSC_FALSE; 953 #endif 954 } 955 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 956 957 /* inform other processors of number of messages and max length*/ 958 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 959 960 /* post receives: */ 961 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 962 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 963 for (i=0; i<nrecvs; i++) { 964 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 965 } 966 967 /* do sends: 968 1) starts[i] gives the starting index in svalues for stuff going to 969 the ith processor 970 */ 971 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 972 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 973 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 974 975 starts[0] = 0; 976 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 977 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 978 979 starts[0] = 0; 980 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 981 count = 0; 982 for (i=0; i<size; i++) { 983 if (nprocs[2*i+1]) { 984 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 985 } 986 } 987 ierr = PetscFree(starts);CHKERRQ(ierr); 988 989 base = owners[rank]; 990 991 /* wait on receives */ 992 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 993 count = nrecvs; slen = 0; 994 while (count) { 995 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 996 /* unpack receives into our local space */ 997 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 998 999 source[imdex] = recv_status.MPI_SOURCE; 1000 lens[imdex] = n; 1001 slen += n; 1002 count--; 1003 } 1004 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1005 1006 /* move the data into the send scatter */ 1007 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1008 count = 0; 1009 for (i=0; i<nrecvs; i++) { 1010 values = rvalues + i*nmax; 1011 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1012 } 1013 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1014 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1015 ierr = PetscFree(owner);CHKERRQ(ierr); 1016 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1017 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1018 1019 /* zero diagonal part of matrix */ 1020 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1021 1022 /* handle off diagonal part of matrix */ 1023 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1024 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1025 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1026 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1027 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1028 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1031 if (x) { 1032 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1035 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1036 } 1037 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1038 1039 /* remove zeroed rows of off diagonal matrix */ 1040 ii = aij->i; 1041 for (i=0; i<slen; i++) { 1042 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1043 } 1044 1045 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1046 if (aij->compressedrow.use) { 1047 m = aij->compressedrow.nrows; 1048 ii = aij->compressedrow.i; 1049 ridx = aij->compressedrow.rindex; 1050 for (i=0; i<m; i++) { 1051 n = ii[i+1] - ii[i]; 1052 aj = aij->j + ii[i]; 1053 aa = aij->a + ii[i]; 1054 1055 for (j=0; j<n; j++) { 1056 if (PetscAbsScalar(mask[*aj])) { 1057 if (b) bb[*ridx] -= *aa*xx[*aj]; 1058 *aa = 0.0; 1059 } 1060 aa++; 1061 aj++; 1062 } 1063 ridx++; 1064 } 1065 } else { /* do not use compressed row format */ 1066 m = l->B->rmap->n; 1067 for (i=0; i<m; i++) { 1068 n = ii[i+1] - ii[i]; 1069 aj = aij->j + ii[i]; 1070 aa = aij->a + ii[i]; 1071 for (j=0; j<n; j++) { 1072 if (PetscAbsScalar(mask[*aj])) { 1073 if (b) bb[i] -= *aa*xx[*aj]; 1074 *aa = 0.0; 1075 } 1076 aa++; 1077 aj++; 1078 } 1079 } 1080 } 1081 if (x) { 1082 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1083 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1084 } 1085 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1086 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1087 ierr = PetscFree(lrows);CHKERRQ(ierr); 1088 1089 /* wait on sends */ 1090 if (nsends) { 1091 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1092 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1093 ierr = PetscFree(send_status);CHKERRQ(ierr); 1094 } 1095 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1096 ierr = PetscFree(svalues);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMult_MPIAIJ" 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 1108 PetscFunctionBegin; 1109 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1110 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscBool merged; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 if (!merged) { 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* added in yy until the next line, */ 1164 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 } else { 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 /* values actually were received in the Begin() but we need to call this nop */ 1171 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1178 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1179 { 1180 MPI_Comm comm; 1181 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1182 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1183 IS Me,Notme; 1184 PetscErrorCode ierr; 1185 PetscInt M,N,first,last,*notme,i; 1186 PetscMPIInt size; 1187 1188 PetscFunctionBegin; 1189 /* Easy test: symmetric diagonal block */ 1190 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1191 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1192 if (!*f) PetscFunctionReturn(0); 1193 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1194 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1195 if (size == 1) PetscFunctionReturn(0); 1196 1197 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1198 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1199 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1200 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1201 for (i=0; i<first; i++) notme[i] = i; 1202 for (i=last; i<M; i++) notme[i-last+first] = i; 1203 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1204 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1205 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1206 Aoff = Aoffs[0]; 1207 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1208 Boff = Boffs[0]; 1209 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1210 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1211 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1212 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1213 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1214 ierr = PetscFree(notme);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1221 { 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1223 PetscErrorCode ierr; 1224 1225 PetscFunctionBegin; 1226 /* do nondiagonal part */ 1227 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1228 /* send it on its way */ 1229 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1230 /* do local part */ 1231 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1232 /* receive remote parts */ 1233 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 /* 1238 This only works correctly for square matrices where the subblock A->A is the 1239 diagonal block 1240 */ 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1244 { 1245 PetscErrorCode ierr; 1246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1247 1248 PetscFunctionBegin; 1249 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1250 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1251 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatScale_MPIAIJ" 1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1258 { 1259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1260 PetscErrorCode ierr; 1261 1262 PetscFunctionBegin; 1263 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1264 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatDestroy_MPIAIJ" 1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1271 { 1272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1273 PetscErrorCode ierr; 1274 1275 PetscFunctionBegin; 1276 #if defined(PETSC_USE_LOG) 1277 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1278 #endif 1279 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1280 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1281 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1282 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1283 #if defined(PETSC_USE_CTABLE) 1284 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1285 #else 1286 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1287 #endif 1288 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1289 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1290 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1291 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1292 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1293 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1294 1295 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1310 { 1311 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1312 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1313 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1314 PetscErrorCode ierr; 1315 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1316 int fd; 1317 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1318 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1319 PetscScalar *column_values; 1320 PetscInt message_count,flowcontrolcount; 1321 FILE *file; 1322 1323 PetscFunctionBegin; 1324 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1325 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1326 nz = A->nz + B->nz; 1327 if (!rank) { 1328 header[0] = MAT_FILE_CLASSID; 1329 header[1] = mat->rmap->N; 1330 header[2] = mat->cmap->N; 1331 1332 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1333 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1334 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 /* get largest number of rows any processor has */ 1336 rlen = mat->rmap->n; 1337 range = mat->rmap->range; 1338 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1339 } else { 1340 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1341 rlen = mat->rmap->n; 1342 } 1343 1344 /* load up the local row counts */ 1345 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1346 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1347 1348 /* store the row lengths to the file */ 1349 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1350 if (!rank) { 1351 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1352 for (i=1; i<size; i++) { 1353 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1354 rlen = range[i+1] - range[i]; 1355 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1356 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1357 } 1358 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1359 } else { 1360 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1361 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1362 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1363 } 1364 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1365 1366 /* load up the local column indices */ 1367 nzmax = nz; /* th processor needs space a largest processor needs */ 1368 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1369 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1370 cnt = 0; 1371 for (i=0; i<mat->rmap->n; i++) { 1372 for (j=B->i[i]; j<B->i[i+1]; j++) { 1373 if ((col = garray[B->j[j]]) > cstart) break; 1374 column_indices[cnt++] = col; 1375 } 1376 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1377 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1378 } 1379 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1380 1381 /* store the column indices to the file */ 1382 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1383 if (!rank) { 1384 MPI_Status status; 1385 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1386 for (i=1; i<size; i++) { 1387 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1388 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1389 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1390 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1391 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1392 } 1393 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1394 } else { 1395 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1396 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1397 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1398 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1399 } 1400 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1401 1402 /* load up the local column values */ 1403 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1404 cnt = 0; 1405 for (i=0; i<mat->rmap->n; i++) { 1406 for (j=B->i[i]; j<B->i[i+1]; j++) { 1407 if (garray[B->j[j]] > cstart) break; 1408 column_values[cnt++] = B->a[j]; 1409 } 1410 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1411 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1412 } 1413 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1414 1415 /* store the column values to the file */ 1416 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1417 if (!rank) { 1418 MPI_Status status; 1419 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1420 for (i=1; i<size; i++) { 1421 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1422 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1423 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1424 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1425 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1426 } 1427 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1428 } else { 1429 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1430 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1431 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1432 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1433 } 1434 ierr = PetscFree(column_values);CHKERRQ(ierr); 1435 1436 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1437 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #include <petscdraw.h> 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1445 { 1446 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1447 PetscErrorCode ierr; 1448 PetscMPIInt rank = aij->rank,size = aij->size; 1449 PetscBool isdraw,iascii,isbinary; 1450 PetscViewer sviewer; 1451 PetscViewerFormat format; 1452 1453 PetscFunctionBegin; 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1456 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1457 if (iascii) { 1458 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1459 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1460 MatInfo info; 1461 PetscBool inodes; 1462 1463 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1464 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1465 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1467 if (!inodes) { 1468 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1469 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1470 } else { 1471 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1472 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1473 } 1474 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1475 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1476 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1477 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1478 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1479 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1480 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1481 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1484 PetscInt inodecount,inodelimit,*inodes; 1485 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1486 if (inodes) { 1487 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1488 } else { 1489 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1490 } 1491 PetscFunctionReturn(0); 1492 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1493 PetscFunctionReturn(0); 1494 } 1495 } else if (isbinary) { 1496 if (size == 1) { 1497 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1498 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1499 } else { 1500 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1501 } 1502 PetscFunctionReturn(0); 1503 } else if (isdraw) { 1504 PetscDraw draw; 1505 PetscBool isnull; 1506 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1507 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1508 } 1509 1510 if (size == 1) { 1511 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1512 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1513 } else { 1514 /* assemble the entire matrix onto first processor. */ 1515 Mat A; 1516 Mat_SeqAIJ *Aloc; 1517 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1518 MatScalar *a; 1519 1520 if (mat->rmap->N > 1024) { 1521 PetscBool flg = PETSC_FALSE; 1522 1523 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1524 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1525 } 1526 1527 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1528 if (!rank) { 1529 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1530 } else { 1531 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1532 } 1533 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1534 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1535 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1536 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1537 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1538 1539 /* copy over the A part */ 1540 Aloc = (Mat_SeqAIJ*)aij->A->data; 1541 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1542 row = mat->rmap->rstart; 1543 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1544 for (i=0; i<m; i++) { 1545 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1546 row++; 1547 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1548 } 1549 aj = Aloc->j; 1550 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1551 1552 /* copy over the B part */ 1553 Aloc = (Mat_SeqAIJ*)aij->B->data; 1554 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1555 row = mat->rmap->rstart; 1556 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1557 ct = cols; 1558 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1559 for (i=0; i<m; i++) { 1560 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1561 row++; 1562 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1563 } 1564 ierr = PetscFree(ct);CHKERRQ(ierr); 1565 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1566 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1567 /* 1568 Everyone has to call to draw the matrix since the graphics waits are 1569 synchronized across all processors that share the PetscDraw object 1570 */ 1571 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1572 if (!rank) { 1573 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1574 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1575 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1576 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1577 } 1578 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1579 ierr = MatDestroy(&A);CHKERRQ(ierr); 1580 } 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatView_MPIAIJ" 1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1587 { 1588 PetscErrorCode ierr; 1589 PetscBool iascii,isdraw,issocket,isbinary; 1590 1591 PetscFunctionBegin; 1592 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1595 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1596 if (iascii || isdraw || isbinary || issocket) { 1597 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "MatSOR_MPIAIJ" 1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1605 { 1606 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1607 PetscErrorCode ierr; 1608 Vec bb1 = 0; 1609 PetscBool hasop; 1610 1611 PetscFunctionBegin; 1612 if (flag == SOR_APPLY_UPPER) { 1613 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1618 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1619 } 1620 1621 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1622 if (flag & SOR_ZERO_INITIAL_GUESS) { 1623 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1624 its--; 1625 } 1626 1627 while (its--) { 1628 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 1631 /* update rhs: bb1 = bb - B*x */ 1632 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1633 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1634 1635 /* local sweep */ 1636 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1637 } 1638 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1639 if (flag & SOR_ZERO_INITIAL_GUESS) { 1640 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1641 its--; 1642 } 1643 while (its--) { 1644 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 1647 /* update rhs: bb1 = bb - B*x */ 1648 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1649 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1650 1651 /* local sweep */ 1652 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1653 } 1654 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1655 if (flag & SOR_ZERO_INITIAL_GUESS) { 1656 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1657 its--; 1658 } 1659 while (its--) { 1660 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 1663 /* update rhs: bb1 = bb - B*x */ 1664 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1665 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1666 1667 /* local sweep */ 1668 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1669 } 1670 } else if (flag & SOR_EISENSTAT) { 1671 Vec xx1; 1672 1673 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1675 1676 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1677 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 if (!mat->diag) { 1679 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1680 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1681 } 1682 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1683 if (hasop) { 1684 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1685 } else { 1686 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1687 } 1688 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1689 1690 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1691 1692 /* local sweep */ 1693 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1694 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1695 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1696 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1697 1698 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatPermute_MPIAIJ" 1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1705 { 1706 Mat aA,aB,Aperm; 1707 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1708 PetscScalar *aa,*ba; 1709 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1710 PetscSF rowsf,sf; 1711 IS parcolp = NULL; 1712 PetscBool done; 1713 PetscErrorCode ierr; 1714 1715 PetscFunctionBegin; 1716 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1717 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1718 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1719 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1720 1721 /* Invert row permutation to find out where my rows should go */ 1722 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1723 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1724 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1725 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1726 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1727 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1728 1729 /* Invert column permutation to find out where my columns should go */ 1730 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1731 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1732 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1733 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1734 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1735 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1736 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1737 1738 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1739 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1740 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1741 1742 /* Find out where my gcols should go */ 1743 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1744 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1745 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1746 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1747 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1748 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1749 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1750 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1751 1752 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1753 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1755 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1756 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1757 for (i=0; i<m; i++) { 1758 PetscInt row = rdest[i],rowner; 1759 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1760 for (j=ai[i]; j<ai[i+1]; j++) { 1761 PetscInt cowner,col = cdest[aj[j]]; 1762 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1763 if (rowner == cowner) dnnz[i]++; 1764 else onnz[i]++; 1765 } 1766 for (j=bi[i]; j<bi[i+1]; j++) { 1767 PetscInt cowner,col = gcdest[bj[j]]; 1768 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1769 if (rowner == cowner) dnnz[i]++; 1770 else onnz[i]++; 1771 } 1772 } 1773 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1774 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1775 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1776 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1777 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1778 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1779 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1780 1781 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1782 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1783 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1784 for (i=0; i<m; i++) { 1785 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1786 PetscInt rowlen; 1787 rowlen = ai[i+1] - ai[i]; 1788 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1789 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1790 rowlen = bi[i+1] - bi[i]; 1791 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1792 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1793 } 1794 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1795 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1797 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1798 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1799 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1800 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1801 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1802 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1803 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1804 *B = Aperm; 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1811 { 1812 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1813 Mat A = mat->A,B = mat->B; 1814 PetscErrorCode ierr; 1815 PetscReal isend[5],irecv[5]; 1816 1817 PetscFunctionBegin; 1818 info->block_size = 1.0; 1819 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1820 1821 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1822 isend[3] = info->memory; isend[4] = info->mallocs; 1823 1824 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1825 1826 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1827 isend[3] += info->memory; isend[4] += info->mallocs; 1828 if (flag == MAT_LOCAL) { 1829 info->nz_used = isend[0]; 1830 info->nz_allocated = isend[1]; 1831 info->nz_unneeded = isend[2]; 1832 info->memory = isend[3]; 1833 info->mallocs = isend[4]; 1834 } else if (flag == MAT_GLOBAL_MAX) { 1835 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1836 1837 info->nz_used = irecv[0]; 1838 info->nz_allocated = irecv[1]; 1839 info->nz_unneeded = irecv[2]; 1840 info->memory = irecv[3]; 1841 info->mallocs = irecv[4]; 1842 } else if (flag == MAT_GLOBAL_SUM) { 1843 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1844 1845 info->nz_used = irecv[0]; 1846 info->nz_allocated = irecv[1]; 1847 info->nz_unneeded = irecv[2]; 1848 info->memory = irecv[3]; 1849 info->mallocs = irecv[4]; 1850 } 1851 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1852 info->fill_ratio_needed = 0; 1853 info->factor_mallocs = 0; 1854 PetscFunctionReturn(0); 1855 } 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatSetOption_MPIAIJ" 1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1860 { 1861 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1862 PetscErrorCode ierr; 1863 1864 PetscFunctionBegin; 1865 switch (op) { 1866 case MAT_NEW_NONZERO_LOCATIONS: 1867 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1868 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1869 case MAT_KEEP_NONZERO_PATTERN: 1870 case MAT_NEW_NONZERO_LOCATION_ERR: 1871 case MAT_USE_INODES: 1872 case MAT_IGNORE_ZERO_ENTRIES: 1873 MatCheckPreallocated(A,1); 1874 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1875 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1876 break; 1877 case MAT_ROW_ORIENTED: 1878 a->roworiented = flg; 1879 1880 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1881 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1882 break; 1883 case MAT_NEW_DIAGONALS: 1884 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1885 break; 1886 case MAT_IGNORE_OFF_PROC_ENTRIES: 1887 a->donotstash = flg; 1888 break; 1889 case MAT_SPD: 1890 A->spd_set = PETSC_TRUE; 1891 A->spd = flg; 1892 if (flg) { 1893 A->symmetric = PETSC_TRUE; 1894 A->structurally_symmetric = PETSC_TRUE; 1895 A->symmetric_set = PETSC_TRUE; 1896 A->structurally_symmetric_set = PETSC_TRUE; 1897 } 1898 break; 1899 case MAT_SYMMETRIC: 1900 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1901 break; 1902 case MAT_STRUCTURALLY_SYMMETRIC: 1903 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1904 break; 1905 case MAT_HERMITIAN: 1906 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1907 break; 1908 case MAT_SYMMETRY_ETERNAL: 1909 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1910 break; 1911 default: 1912 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1913 } 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatGetRow_MPIAIJ" 1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1920 { 1921 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1922 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1923 PetscErrorCode ierr; 1924 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1925 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1926 PetscInt *cmap,*idx_p; 1927 1928 PetscFunctionBegin; 1929 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1930 mat->getrowactive = PETSC_TRUE; 1931 1932 if (!mat->rowvalues && (idx || v)) { 1933 /* 1934 allocate enough space to hold information from the longest row. 1935 */ 1936 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1937 PetscInt max = 1,tmp; 1938 for (i=0; i<matin->rmap->n; i++) { 1939 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1940 if (max < tmp) max = tmp; 1941 } 1942 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1943 } 1944 1945 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1946 lrow = row - rstart; 1947 1948 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1949 if (!v) {pvA = 0; pvB = 0;} 1950 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1951 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1952 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1953 nztot = nzA + nzB; 1954 1955 cmap = mat->garray; 1956 if (v || idx) { 1957 if (nztot) { 1958 /* Sort by increasing column numbers, assuming A and B already sorted */ 1959 PetscInt imark = -1; 1960 if (v) { 1961 *v = v_p = mat->rowvalues; 1962 for (i=0; i<nzB; i++) { 1963 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1964 else break; 1965 } 1966 imark = i; 1967 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1968 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1969 } 1970 if (idx) { 1971 *idx = idx_p = mat->rowindices; 1972 if (imark > -1) { 1973 for (i=0; i<imark; i++) { 1974 idx_p[i] = cmap[cworkB[i]]; 1975 } 1976 } else { 1977 for (i=0; i<nzB; i++) { 1978 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1979 else break; 1980 } 1981 imark = i; 1982 } 1983 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1984 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1985 } 1986 } else { 1987 if (idx) *idx = 0; 1988 if (v) *v = 0; 1989 } 1990 } 1991 *nz = nztot; 1992 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1993 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2000 { 2001 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2002 2003 PetscFunctionBegin; 2004 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2005 aij->getrowactive = PETSC_FALSE; 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatNorm_MPIAIJ" 2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2012 { 2013 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2014 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2015 PetscErrorCode ierr; 2016 PetscInt i,j,cstart = mat->cmap->rstart; 2017 PetscReal sum = 0.0; 2018 MatScalar *v; 2019 2020 PetscFunctionBegin; 2021 if (aij->size == 1) { 2022 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2023 } else { 2024 if (type == NORM_FROBENIUS) { 2025 v = amat->a; 2026 for (i=0; i<amat->nz; i++) { 2027 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2028 } 2029 v = bmat->a; 2030 for (i=0; i<bmat->nz; i++) { 2031 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2032 } 2033 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2034 *norm = PetscSqrtReal(*norm); 2035 } else if (type == NORM_1) { /* max column norm */ 2036 PetscReal *tmp,*tmp2; 2037 PetscInt *jj,*garray = aij->garray; 2038 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2039 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2040 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2041 *norm = 0.0; 2042 v = amat->a; jj = amat->j; 2043 for (j=0; j<amat->nz; j++) { 2044 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2045 } 2046 v = bmat->a; jj = bmat->j; 2047 for (j=0; j<bmat->nz; j++) { 2048 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2049 } 2050 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2051 for (j=0; j<mat->cmap->N; j++) { 2052 if (tmp2[j] > *norm) *norm = tmp2[j]; 2053 } 2054 ierr = PetscFree(tmp);CHKERRQ(ierr); 2055 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2056 } else if (type == NORM_INFINITY) { /* max row norm */ 2057 PetscReal ntemp = 0.0; 2058 for (j=0; j<aij->A->rmap->n; j++) { 2059 v = amat->a + amat->i[j]; 2060 sum = 0.0; 2061 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2062 sum += PetscAbsScalar(*v); v++; 2063 } 2064 v = bmat->a + bmat->i[j]; 2065 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2066 sum += PetscAbsScalar(*v); v++; 2067 } 2068 if (sum > ntemp) ntemp = sum; 2069 } 2070 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2071 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2072 } 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatTranspose_MPIAIJ" 2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2079 { 2080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2081 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2082 PetscErrorCode ierr; 2083 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2084 PetscInt cstart = A->cmap->rstart,ncol; 2085 Mat B; 2086 MatScalar *array; 2087 2088 PetscFunctionBegin; 2089 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2090 2091 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2092 ai = Aloc->i; aj = Aloc->j; 2093 bi = Bloc->i; bj = Bloc->j; 2094 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2095 PetscInt *d_nnz,*g_nnz,*o_nnz; 2096 PetscSFNode *oloc; 2097 PETSC_UNUSED PetscSF sf; 2098 2099 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2100 /* compute d_nnz for preallocation */ 2101 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2102 for (i=0; i<ai[ma]; i++) { 2103 d_nnz[aj[i]]++; 2104 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2105 } 2106 /* compute local off-diagonal contributions */ 2107 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2108 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2109 /* map those to global */ 2110 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2111 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2112 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2113 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2114 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2115 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2116 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2117 2118 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2119 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2120 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2121 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2122 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2123 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2124 } else { 2125 B = *matout; 2126 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2127 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2128 } 2129 2130 /* copy over the A part */ 2131 array = Aloc->a; 2132 row = A->rmap->rstart; 2133 for (i=0; i<ma; i++) { 2134 ncol = ai[i+1]-ai[i]; 2135 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2136 row++; 2137 array += ncol; aj += ncol; 2138 } 2139 aj = Aloc->j; 2140 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2141 2142 /* copy over the B part */ 2143 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2144 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2145 array = Bloc->a; 2146 row = A->rmap->rstart; 2147 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2148 cols_tmp = cols; 2149 for (i=0; i<mb; i++) { 2150 ncol = bi[i+1]-bi[i]; 2151 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2152 row++; 2153 array += ncol; cols_tmp += ncol; 2154 } 2155 ierr = PetscFree(cols);CHKERRQ(ierr); 2156 2157 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2160 *matout = B; 2161 } else { 2162 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2163 } 2164 PetscFunctionReturn(0); 2165 } 2166 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2170 { 2171 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2172 Mat a = aij->A,b = aij->B; 2173 PetscErrorCode ierr; 2174 PetscInt s1,s2,s3; 2175 2176 PetscFunctionBegin; 2177 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2178 if (rr) { 2179 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2180 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2181 /* Overlap communication with computation. */ 2182 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2183 } 2184 if (ll) { 2185 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2186 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2187 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2188 } 2189 /* scale the diagonal block */ 2190 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2191 2192 if (rr) { 2193 /* Do a scatter end and then right scale the off-diagonal block */ 2194 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2195 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2196 } 2197 PetscFunctionReturn(0); 2198 } 2199 2200 #undef __FUNCT__ 2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2203 { 2204 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2205 PetscErrorCode ierr; 2206 2207 PetscFunctionBegin; 2208 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2209 PetscFunctionReturn(0); 2210 } 2211 2212 #undef __FUNCT__ 2213 #define __FUNCT__ "MatEqual_MPIAIJ" 2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2215 { 2216 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2217 Mat a,b,c,d; 2218 PetscBool flg; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 a = matA->A; b = matA->B; 2223 c = matB->A; d = matB->B; 2224 2225 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2226 if (flg) { 2227 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2228 } 2229 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } 2232 2233 #undef __FUNCT__ 2234 #define __FUNCT__ "MatCopy_MPIAIJ" 2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2236 { 2237 PetscErrorCode ierr; 2238 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2239 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2240 2241 PetscFunctionBegin; 2242 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2243 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2244 /* because of the column compression in the off-processor part of the matrix a->B, 2245 the number of columns in a->B and b->B may be different, hence we cannot call 2246 the MatCopy() directly on the two parts. If need be, we can provide a more 2247 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2248 then copying the submatrices */ 2249 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2250 } else { 2251 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2252 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2253 } 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "MatSetUp_MPIAIJ" 2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2260 { 2261 PetscErrorCode ierr; 2262 2263 PetscFunctionBegin; 2264 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2265 PetscFunctionReturn(0); 2266 } 2267 2268 #undef __FUNCT__ 2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2272 { 2273 PetscInt i,m=Y->rmap->N; 2274 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2275 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2276 const PetscInt *xi = x->i,*yi = y->i; 2277 2278 PetscFunctionBegin; 2279 /* Set the number of nonzeros in the new matrix */ 2280 for (i=0; i<m; i++) { 2281 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2282 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2283 nnz[i] = 0; 2284 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2285 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2286 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2287 nnz[i]++; 2288 } 2289 for (; k<nzy; k++) nnz[i]++; 2290 } 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatAXPY_MPIAIJ" 2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2297 { 2298 PetscErrorCode ierr; 2299 PetscInt i; 2300 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2301 PetscBLASInt bnz,one=1; 2302 Mat_SeqAIJ *x,*y; 2303 2304 PetscFunctionBegin; 2305 if (str == SAME_NONZERO_PATTERN) { 2306 PetscScalar alpha = a; 2307 x = (Mat_SeqAIJ*)xx->A->data; 2308 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2309 y = (Mat_SeqAIJ*)yy->A->data; 2310 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2311 x = (Mat_SeqAIJ*)xx->B->data; 2312 y = (Mat_SeqAIJ*)yy->B->data; 2313 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2314 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2315 } else if (str == SUBSET_NONZERO_PATTERN) { 2316 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2317 2318 x = (Mat_SeqAIJ*)xx->B->data; 2319 y = (Mat_SeqAIJ*)yy->B->data; 2320 if (y->xtoy && y->XtoY != xx->B) { 2321 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2322 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2323 } 2324 if (!y->xtoy) { /* get xtoy */ 2325 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2326 y->XtoY = xx->B; 2327 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2328 } 2329 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2330 } else { 2331 Mat B; 2332 PetscInt *nnz_d,*nnz_o; 2333 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2334 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2335 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2336 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2337 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2338 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2339 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2340 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2341 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2342 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2343 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2344 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2345 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2346 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2347 } 2348 PetscFunctionReturn(0); 2349 } 2350 2351 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2352 2353 #undef __FUNCT__ 2354 #define __FUNCT__ "MatConjugate_MPIAIJ" 2355 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2356 { 2357 #if defined(PETSC_USE_COMPLEX) 2358 PetscErrorCode ierr; 2359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2360 2361 PetscFunctionBegin; 2362 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2363 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2364 #else 2365 PetscFunctionBegin; 2366 #endif 2367 PetscFunctionReturn(0); 2368 } 2369 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "MatRealPart_MPIAIJ" 2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2373 { 2374 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2375 PetscErrorCode ierr; 2376 2377 PetscFunctionBegin; 2378 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2379 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2386 { 2387 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2388 PetscErrorCode ierr; 2389 2390 PetscFunctionBegin; 2391 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2392 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #if defined(PETSC_HAVE_PBGL) 2397 2398 #include <boost/parallel/mpi/bsp_process_group.hpp> 2399 #include <boost/graph/distributed/ilu_default_graph.hpp> 2400 #include <boost/graph/distributed/ilu_0_block.hpp> 2401 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2402 #include <boost/graph/distributed/petsc/interface.hpp> 2403 #include <boost/multi_array.hpp> 2404 #include <boost/parallel/distributed_property_map->hpp> 2405 2406 #undef __FUNCT__ 2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2408 /* 2409 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2410 */ 2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2412 { 2413 namespace petsc = boost::distributed::petsc; 2414 2415 namespace graph_dist = boost::graph::distributed; 2416 using boost::graph::distributed::ilu_default::process_group_type; 2417 using boost::graph::ilu_permuted; 2418 2419 PetscBool row_identity, col_identity; 2420 PetscContainer c; 2421 PetscInt m, n, M, N; 2422 PetscErrorCode ierr; 2423 2424 PetscFunctionBegin; 2425 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2426 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2427 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2428 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2429 2430 process_group_type pg; 2431 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2432 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2433 lgraph_type& level_graph = *lgraph_p; 2434 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2435 2436 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2437 ilu_permuted(level_graph); 2438 2439 /* put together the new matrix */ 2440 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2441 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2442 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2443 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2444 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2445 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2446 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2447 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2448 2449 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2450 ierr = PetscContainerSetPointer(c, lgraph_p); 2451 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2452 ierr = PetscContainerDestroy(&c); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 #undef __FUNCT__ 2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2459 { 2460 PetscFunctionBegin; 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatSolve_MPIAIJ" 2466 /* 2467 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2468 */ 2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2470 { 2471 namespace graph_dist = boost::graph::distributed; 2472 2473 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2474 lgraph_type *lgraph_p; 2475 PetscContainer c; 2476 PetscErrorCode ierr; 2477 2478 PetscFunctionBegin; 2479 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2480 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2481 ierr = VecCopy(b, x);CHKERRQ(ierr); 2482 2483 PetscScalar *array_x; 2484 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2485 PetscInt sx; 2486 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2487 2488 PetscScalar *array_b; 2489 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2490 PetscInt sb; 2491 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2492 2493 lgraph_type& level_graph = *lgraph_p; 2494 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2495 2496 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2497 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2498 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2499 2500 typedef boost::iterator_property_map<array_ref_type::iterator, 2501 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2502 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2503 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2504 2505 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2506 PetscFunctionReturn(0); 2507 } 2508 #endif 2509 2510 #undef __FUNCT__ 2511 #define __FUNCT__ "MatDestroy_MatRedundant" 2512 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2513 { 2514 PetscErrorCode ierr; 2515 Mat_Redundant *redund; 2516 PetscInt i; 2517 PetscMPIInt size; 2518 2519 PetscFunctionBegin; 2520 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2521 if (size == 1) { 2522 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2523 redund = a->redundant; 2524 } else { 2525 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2526 redund = a->redundant; 2527 } 2528 if (redund){ 2529 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2530 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2531 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2532 for (i=0; i<redund->nrecvs; i++) { 2533 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2534 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2535 } 2536 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2537 ierr = redund->Destroy(A);CHKERRQ(ierr); 2538 ierr = PetscFree(redund);CHKERRQ(ierr); 2539 } 2540 PetscFunctionReturn(0); 2541 } 2542 2543 #undef __FUNCT__ 2544 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2545 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant) 2546 { 2547 PetscMPIInt rank,size; 2548 MPI_Comm comm,subcomm=psubcomm->comm; 2549 PetscErrorCode ierr; 2550 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2551 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2552 PetscInt *rowrange = mat->rmap->range; 2553 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2554 Mat A = aij->A,B=aij->B,C=*matredundant; 2555 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2556 PetscScalar *sbuf_a; 2557 PetscInt nzlocal=a->nz+b->nz; 2558 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2559 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2560 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2561 MatScalar *aworkA,*aworkB; 2562 PetscScalar *vals; 2563 PetscMPIInt tag1,tag2,tag3,imdex; 2564 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2565 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2566 MPI_Status recv_status,*send_status; 2567 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2568 PetscInt **rbuf_j=NULL; 2569 PetscScalar **rbuf_a=NULL; 2570 Mat_Redundant *redund =NULL; 2571 PetscInt rstart_sub,rend_sub,mloc_sub; 2572 2573 PetscFunctionBegin; 2574 if (psubcomm->type != PETSC_SUBCOMM_INTERLACED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMETYPE,"Input psubcomm must be PETSC_SUBCOMM_INTERLACED type"); 2575 ierr = MatGetSize(mat,&M,&N);CHKERRQ(ierr); 2576 if (M != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-square matrix yet, rows %D columns %D",M,N); 2577 2578 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2579 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2580 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2581 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2582 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2583 2584 if (reuse == MAT_REUSE_MATRIX) { 2585 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2586 if (subsize == 1) { 2587 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2588 redund = c->redundant; 2589 } else { 2590 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2591 redund = c->redundant; 2592 } 2593 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2594 2595 nsends = redund->nsends; 2596 nrecvs = redund->nrecvs; 2597 send_rank = redund->send_rank; 2598 recv_rank = redund->recv_rank; 2599 sbuf_nz = redund->sbuf_nz; 2600 rbuf_nz = redund->rbuf_nz; 2601 sbuf_j = redund->sbuf_j; 2602 sbuf_a = redund->sbuf_a; 2603 rbuf_j = redund->rbuf_j; 2604 rbuf_a = redund->rbuf_a; 2605 } 2606 2607 if (reuse == MAT_INITIAL_MATRIX) { 2608 PetscInt nleftover,np_subcomm; 2609 2610 /* get local size of redundant matrix */ 2611 const PetscInt *range; 2612 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2613 rstart_sub = range[nsubcomm*subrank]; 2614 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2615 rend_sub = range[nsubcomm*(subrank+1)]; 2616 } else { 2617 rend_sub = mat->rmap->N; 2618 } 2619 mloc_sub = rend_sub - rstart_sub; 2620 2621 /* get the destination processors' id send_rank, nsends and nrecvs */ 2622 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2623 2624 np_subcomm = size/nsubcomm; 2625 nleftover = size - nsubcomm*np_subcomm; 2626 2627 nsends = 0; nrecvs = 0; 2628 for (i=0; i<size; i++) { 2629 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2630 send_rank[nsends] = i; nsends++; 2631 recv_rank[nrecvs++] = i; 2632 } 2633 } 2634 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2635 i = size-nleftover-1; 2636 j = 0; 2637 while (j < nsubcomm - nleftover) { 2638 send_rank[nsends++] = i; 2639 i--; j++; 2640 } 2641 } 2642 2643 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2644 for (i=0; i<nleftover; i++) { 2645 recv_rank[nrecvs++] = size-nleftover+i; 2646 } 2647 } 2648 2649 /* allocate sbuf_j, sbuf_a */ 2650 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2651 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2652 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2653 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2654 2655 /* copy mat's local entries into the buffers */ 2656 if (reuse == MAT_INITIAL_MATRIX) { 2657 rownz_max = 0; 2658 rptr = sbuf_j; 2659 cols = sbuf_j + rend-rstart + 1; 2660 vals = sbuf_a; 2661 rptr[0] = 0; 2662 for (i=0; i<rend-rstart; i++) { 2663 row = i + rstart; 2664 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2665 ncols = nzA + nzB; 2666 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2667 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2668 /* load the column indices for this row into cols */ 2669 lwrite = 0; 2670 for (l=0; l<nzB; l++) { 2671 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2672 vals[lwrite] = aworkB[l]; 2673 cols[lwrite++] = ctmp; 2674 } 2675 } 2676 for (l=0; l<nzA; l++) { 2677 vals[lwrite] = aworkA[l]; 2678 cols[lwrite++] = cstart + cworkA[l]; 2679 } 2680 for (l=0; l<nzB; l++) { 2681 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2682 vals[lwrite] = aworkB[l]; 2683 cols[lwrite++] = ctmp; 2684 } 2685 } 2686 vals += ncols; 2687 cols += ncols; 2688 rptr[i+1] = rptr[i] + ncols; 2689 if (rownz_max < ncols) rownz_max = ncols; 2690 } 2691 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2692 } else { /* only copy matrix values into sbuf_a */ 2693 rptr = sbuf_j; 2694 vals = sbuf_a; 2695 rptr[0] = 0; 2696 for (i=0; i<rend-rstart; i++) { 2697 row = i + rstart; 2698 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2699 ncols = nzA + nzB; 2700 cworkB = b->j + b->i[i]; 2701 aworkA = a->a + a->i[i]; 2702 aworkB = b->a + b->i[i]; 2703 lwrite = 0; 2704 for (l=0; l<nzB; l++) { 2705 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2706 } 2707 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2708 for (l=0; l<nzB; l++) { 2709 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2710 } 2711 vals += ncols; 2712 rptr[i+1] = rptr[i] + ncols; 2713 } 2714 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2715 2716 /* send nzlocal to others, and recv other's nzlocal */ 2717 /*--------------------------------------------------*/ 2718 if (reuse == MAT_INITIAL_MATRIX) { 2719 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2720 2721 s_waits2 = s_waits3 + nsends; 2722 s_waits1 = s_waits2 + nsends; 2723 r_waits1 = s_waits1 + nsends; 2724 r_waits2 = r_waits1 + nrecvs; 2725 r_waits3 = r_waits2 + nrecvs; 2726 } else { 2727 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2728 2729 r_waits3 = s_waits3 + nsends; 2730 } 2731 2732 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2733 if (reuse == MAT_INITIAL_MATRIX) { 2734 /* get new tags to keep the communication clean */ 2735 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2736 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2737 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2738 2739 /* post receives of other's nzlocal */ 2740 for (i=0; i<nrecvs; i++) { 2741 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2742 } 2743 /* send nzlocal to others */ 2744 for (i=0; i<nsends; i++) { 2745 sbuf_nz[i] = nzlocal; 2746 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2747 } 2748 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2749 count = nrecvs; 2750 while (count) { 2751 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2752 2753 recv_rank[imdex] = recv_status.MPI_SOURCE; 2754 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2755 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2756 2757 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2758 2759 rbuf_nz[imdex] += i + 2; 2760 2761 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2762 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2763 count--; 2764 } 2765 /* wait on sends of nzlocal */ 2766 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2767 /* send mat->i,j to others, and recv from other's */ 2768 /*------------------------------------------------*/ 2769 for (i=0; i<nsends; i++) { 2770 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2771 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2772 } 2773 /* wait on receives of mat->i,j */ 2774 /*------------------------------*/ 2775 count = nrecvs; 2776 while (count) { 2777 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2778 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2779 count--; 2780 } 2781 /* wait on sends of mat->i,j */ 2782 /*---------------------------*/ 2783 if (nsends) { 2784 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2785 } 2786 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2787 2788 /* post receives, send and receive mat->a */ 2789 /*----------------------------------------*/ 2790 for (imdex=0; imdex<nrecvs; imdex++) { 2791 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2792 } 2793 for (i=0; i<nsends; i++) { 2794 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2795 } 2796 count = nrecvs; 2797 while (count) { 2798 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2799 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2800 count--; 2801 } 2802 if (nsends) { 2803 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2804 } 2805 2806 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2807 2808 /* create redundant matrix */ 2809 /*-------------------------*/ 2810 if (reuse == MAT_INITIAL_MATRIX) { 2811 /* compute rownz_max for preallocation */ 2812 for (imdex=0; imdex<nrecvs; imdex++) { 2813 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2814 rptr = rbuf_j[imdex]; 2815 for (i=0; i<j; i++) { 2816 ncols = rptr[i+1] - rptr[i]; 2817 if (rownz_max < ncols) rownz_max = ncols; 2818 } 2819 } 2820 2821 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2822 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2823 /* ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DETERMINE,mat->cmap->N);CHKERRQ(ierr); -- give incorrect local column! */ 2824 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2825 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2826 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2827 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2828 } else { 2829 C = *matredundant; 2830 } 2831 2832 /* insert local matrix entries */ 2833 rptr = sbuf_j; 2834 cols = sbuf_j + rend-rstart + 1; 2835 vals = sbuf_a; 2836 for (i=0; i<rend-rstart; i++) { 2837 row = i + rstart; 2838 ncols = rptr[i+1] - rptr[i]; 2839 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2840 vals += ncols; 2841 cols += ncols; 2842 } 2843 /* insert received matrix entries */ 2844 for (imdex=0; imdex<nrecvs; imdex++) { 2845 rstart = rowrange[recv_rank[imdex]]; 2846 rend = rowrange[recv_rank[imdex]+1]; 2847 rptr = rbuf_j[imdex]; 2848 cols = rbuf_j[imdex] + rend-rstart + 1; 2849 vals = rbuf_a[imdex]; 2850 for (i=0; i<rend-rstart; i++) { 2851 row = i + rstart; 2852 ncols = rptr[i+1] - rptr[i]; 2853 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2854 vals += ncols; 2855 cols += ncols; 2856 } 2857 } 2858 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2859 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2860 2861 if (reuse == MAT_INITIAL_MATRIX) { 2862 *matredundant = C; 2863 2864 /* create a supporting struct and attach it to C for reuse */ 2865 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2866 if (subsize == 1) { 2867 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2868 c->redundant = redund; 2869 } else { 2870 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2871 c->redundant = redund; 2872 } 2873 2874 redund->nzlocal = nzlocal; 2875 redund->nsends = nsends; 2876 redund->nrecvs = nrecvs; 2877 redund->send_rank = send_rank; 2878 redund->recv_rank = recv_rank; 2879 redund->sbuf_nz = sbuf_nz; 2880 redund->rbuf_nz = rbuf_nz; 2881 redund->sbuf_j = sbuf_j; 2882 redund->sbuf_a = sbuf_a; 2883 redund->rbuf_j = rbuf_j; 2884 redund->rbuf_a = rbuf_a; 2885 2886 redund->Destroy = C->ops->destroy; 2887 C->ops->destroy = MatDestroy_MatRedundant; 2888 } 2889 PetscFunctionReturn(0); 2890 } 2891 2892 #undef __FUNCT__ 2893 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2894 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2895 { 2896 PetscErrorCode ierr; 2897 MPI_Comm comm; 2898 PetscMPIInt size; 2899 2900 PetscFunctionBegin; 2901 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2902 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2903 /* Only MatGetRedundantMatrix_MPIAIJ_interlaced() is written now */ 2904 if (!subcomm || nsubcomm == size) { /* create psubcomm */ 2905 PetscSubcomm psubcomm; 2906 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2907 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2908 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_INTERLACED);CHKERRQ(ierr); 2909 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,psubcomm,reuse,matredundant);CHKERRQ(ierr); 2910 /* destroy psubcomm? */ 2911 } else { 2912 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet"); 2913 } 2914 PetscFunctionReturn(0); 2915 } 2916 2917 #undef __FUNCT__ 2918 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2919 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2920 { 2921 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2922 PetscErrorCode ierr; 2923 PetscInt i,*idxb = 0; 2924 PetscScalar *va,*vb; 2925 Vec vtmp; 2926 2927 PetscFunctionBegin; 2928 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2929 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2930 if (idx) { 2931 for (i=0; i<A->rmap->n; i++) { 2932 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2933 } 2934 } 2935 2936 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2937 if (idx) { 2938 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2939 } 2940 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2941 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2942 2943 for (i=0; i<A->rmap->n; i++) { 2944 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2945 va[i] = vb[i]; 2946 if (idx) idx[i] = a->garray[idxb[i]]; 2947 } 2948 } 2949 2950 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2951 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2952 ierr = PetscFree(idxb);CHKERRQ(ierr); 2953 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2954 PetscFunctionReturn(0); 2955 } 2956 2957 #undef __FUNCT__ 2958 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2959 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2960 { 2961 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2962 PetscErrorCode ierr; 2963 PetscInt i,*idxb = 0; 2964 PetscScalar *va,*vb; 2965 Vec vtmp; 2966 2967 PetscFunctionBegin; 2968 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2969 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2970 if (idx) { 2971 for (i=0; i<A->cmap->n; i++) { 2972 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2973 } 2974 } 2975 2976 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2977 if (idx) { 2978 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2979 } 2980 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2981 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2982 2983 for (i=0; i<A->rmap->n; i++) { 2984 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2985 va[i] = vb[i]; 2986 if (idx) idx[i] = a->garray[idxb[i]]; 2987 } 2988 } 2989 2990 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2991 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2992 ierr = PetscFree(idxb);CHKERRQ(ierr); 2993 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2994 PetscFunctionReturn(0); 2995 } 2996 2997 #undef __FUNCT__ 2998 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2999 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3000 { 3001 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3002 PetscInt n = A->rmap->n; 3003 PetscInt cstart = A->cmap->rstart; 3004 PetscInt *cmap = mat->garray; 3005 PetscInt *diagIdx, *offdiagIdx; 3006 Vec diagV, offdiagV; 3007 PetscScalar *a, *diagA, *offdiagA; 3008 PetscInt r; 3009 PetscErrorCode ierr; 3010 3011 PetscFunctionBegin; 3012 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3013 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 3014 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 3015 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3016 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3017 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3018 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3019 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3020 for (r = 0; r < n; ++r) { 3021 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3022 a[r] = diagA[r]; 3023 idx[r] = cstart + diagIdx[r]; 3024 } else { 3025 a[r] = offdiagA[r]; 3026 idx[r] = cmap[offdiagIdx[r]]; 3027 } 3028 } 3029 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3030 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3031 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3032 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3033 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3034 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3035 PetscFunctionReturn(0); 3036 } 3037 3038 #undef __FUNCT__ 3039 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3040 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3041 { 3042 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3043 PetscInt n = A->rmap->n; 3044 PetscInt cstart = A->cmap->rstart; 3045 PetscInt *cmap = mat->garray; 3046 PetscInt *diagIdx, *offdiagIdx; 3047 Vec diagV, offdiagV; 3048 PetscScalar *a, *diagA, *offdiagA; 3049 PetscInt r; 3050 PetscErrorCode ierr; 3051 3052 PetscFunctionBegin; 3053 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3054 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3055 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3056 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3057 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3058 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3059 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3060 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3061 for (r = 0; r < n; ++r) { 3062 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3063 a[r] = diagA[r]; 3064 idx[r] = cstart + diagIdx[r]; 3065 } else { 3066 a[r] = offdiagA[r]; 3067 idx[r] = cmap[offdiagIdx[r]]; 3068 } 3069 } 3070 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3071 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3072 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3073 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3074 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3075 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3076 PetscFunctionReturn(0); 3077 } 3078 3079 #undef __FUNCT__ 3080 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3081 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3082 { 3083 PetscErrorCode ierr; 3084 Mat *dummy; 3085 3086 PetscFunctionBegin; 3087 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3088 *newmat = *dummy; 3089 ierr = PetscFree(dummy);CHKERRQ(ierr); 3090 PetscFunctionReturn(0); 3091 } 3092 3093 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3094 3095 #undef __FUNCT__ 3096 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3097 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3098 { 3099 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3100 PetscErrorCode ierr; 3101 3102 PetscFunctionBegin; 3103 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3104 PetscFunctionReturn(0); 3105 } 3106 3107 #undef __FUNCT__ 3108 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3109 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3110 { 3111 PetscErrorCode ierr; 3112 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3113 3114 PetscFunctionBegin; 3115 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3116 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3117 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3118 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3119 PetscFunctionReturn(0); 3120 } 3121 3122 /* -------------------------------------------------------------------*/ 3123 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3124 MatGetRow_MPIAIJ, 3125 MatRestoreRow_MPIAIJ, 3126 MatMult_MPIAIJ, 3127 /* 4*/ MatMultAdd_MPIAIJ, 3128 MatMultTranspose_MPIAIJ, 3129 MatMultTransposeAdd_MPIAIJ, 3130 #if defined(PETSC_HAVE_PBGL) 3131 MatSolve_MPIAIJ, 3132 #else 3133 0, 3134 #endif 3135 0, 3136 0, 3137 /*10*/ 0, 3138 0, 3139 0, 3140 MatSOR_MPIAIJ, 3141 MatTranspose_MPIAIJ, 3142 /*15*/ MatGetInfo_MPIAIJ, 3143 MatEqual_MPIAIJ, 3144 MatGetDiagonal_MPIAIJ, 3145 MatDiagonalScale_MPIAIJ, 3146 MatNorm_MPIAIJ, 3147 /*20*/ MatAssemblyBegin_MPIAIJ, 3148 MatAssemblyEnd_MPIAIJ, 3149 MatSetOption_MPIAIJ, 3150 MatZeroEntries_MPIAIJ, 3151 /*24*/ MatZeroRows_MPIAIJ, 3152 0, 3153 #if defined(PETSC_HAVE_PBGL) 3154 0, 3155 #else 3156 0, 3157 #endif 3158 0, 3159 0, 3160 /*29*/ MatSetUp_MPIAIJ, 3161 #if defined(PETSC_HAVE_PBGL) 3162 0, 3163 #else 3164 0, 3165 #endif 3166 0, 3167 0, 3168 0, 3169 /*34*/ MatDuplicate_MPIAIJ, 3170 0, 3171 0, 3172 0, 3173 0, 3174 /*39*/ MatAXPY_MPIAIJ, 3175 MatGetSubMatrices_MPIAIJ, 3176 MatIncreaseOverlap_MPIAIJ, 3177 MatGetValues_MPIAIJ, 3178 MatCopy_MPIAIJ, 3179 /*44*/ MatGetRowMax_MPIAIJ, 3180 MatScale_MPIAIJ, 3181 0, 3182 0, 3183 MatZeroRowsColumns_MPIAIJ, 3184 /*49*/ MatSetRandom_MPIAIJ, 3185 0, 3186 0, 3187 0, 3188 0, 3189 /*54*/ MatFDColoringCreate_MPIAIJ, 3190 0, 3191 MatSetUnfactored_MPIAIJ, 3192 MatPermute_MPIAIJ, 3193 0, 3194 /*59*/ MatGetSubMatrix_MPIAIJ, 3195 MatDestroy_MPIAIJ, 3196 MatView_MPIAIJ, 3197 0, 3198 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3199 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3200 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3201 0, 3202 0, 3203 0, 3204 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3205 MatGetRowMinAbs_MPIAIJ, 3206 0, 3207 MatSetColoring_MPIAIJ, 3208 0, 3209 MatSetValuesAdifor_MPIAIJ, 3210 /*75*/ MatFDColoringApply_AIJ, 3211 0, 3212 0, 3213 0, 3214 MatFindZeroDiagonals_MPIAIJ, 3215 /*80*/ 0, 3216 0, 3217 0, 3218 /*83*/ MatLoad_MPIAIJ, 3219 0, 3220 0, 3221 0, 3222 0, 3223 0, 3224 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3225 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3226 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3227 MatPtAP_MPIAIJ_MPIAIJ, 3228 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3229 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3230 0, 3231 0, 3232 0, 3233 0, 3234 /*99*/ 0, 3235 0, 3236 0, 3237 MatConjugate_MPIAIJ, 3238 0, 3239 /*104*/MatSetValuesRow_MPIAIJ, 3240 MatRealPart_MPIAIJ, 3241 MatImaginaryPart_MPIAIJ, 3242 0, 3243 0, 3244 /*109*/0, 3245 MatGetRedundantMatrix_MPIAIJ, 3246 MatGetRowMin_MPIAIJ, 3247 0, 3248 0, 3249 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3250 0, 3251 0, 3252 0, 3253 0, 3254 /*119*/0, 3255 0, 3256 0, 3257 0, 3258 MatGetMultiProcBlock_MPIAIJ, 3259 /*124*/MatFindNonzeroRows_MPIAIJ, 3260 MatGetColumnNorms_MPIAIJ, 3261 MatInvertBlockDiagonal_MPIAIJ, 3262 0, 3263 MatGetSubMatricesParallel_MPIAIJ, 3264 /*129*/0, 3265 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3266 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3267 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3268 0, 3269 /*134*/0, 3270 0, 3271 0, 3272 0, 3273 0, 3274 /*139*/0, 3275 0 3276 }; 3277 3278 /* ----------------------------------------------------------------------------------------*/ 3279 3280 #undef __FUNCT__ 3281 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3282 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3283 { 3284 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3285 PetscErrorCode ierr; 3286 3287 PetscFunctionBegin; 3288 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3289 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3290 PetscFunctionReturn(0); 3291 } 3292 3293 #undef __FUNCT__ 3294 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3295 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3296 { 3297 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3298 PetscErrorCode ierr; 3299 3300 PetscFunctionBegin; 3301 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3302 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3303 PetscFunctionReturn(0); 3304 } 3305 3306 #undef __FUNCT__ 3307 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3308 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3309 { 3310 Mat_MPIAIJ *b; 3311 PetscErrorCode ierr; 3312 3313 PetscFunctionBegin; 3314 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3315 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3316 b = (Mat_MPIAIJ*)B->data; 3317 3318 if (!B->preallocated) { 3319 /* Explicitly create 2 MATSEQAIJ matrices. */ 3320 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3321 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3322 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3323 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3324 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3325 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3326 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3327 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3328 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3329 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3330 } 3331 3332 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3333 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3334 B->preallocated = PETSC_TRUE; 3335 PetscFunctionReturn(0); 3336 } 3337 3338 #undef __FUNCT__ 3339 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3340 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3341 { 3342 Mat mat; 3343 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3344 PetscErrorCode ierr; 3345 3346 PetscFunctionBegin; 3347 *newmat = 0; 3348 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3349 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3350 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3351 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3352 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3353 a = (Mat_MPIAIJ*)mat->data; 3354 3355 mat->factortype = matin->factortype; 3356 mat->rmap->bs = matin->rmap->bs; 3357 mat->cmap->bs = matin->cmap->bs; 3358 mat->assembled = PETSC_TRUE; 3359 mat->insertmode = NOT_SET_VALUES; 3360 mat->preallocated = PETSC_TRUE; 3361 3362 a->size = oldmat->size; 3363 a->rank = oldmat->rank; 3364 a->donotstash = oldmat->donotstash; 3365 a->roworiented = oldmat->roworiented; 3366 a->rowindices = 0; 3367 a->rowvalues = 0; 3368 a->getrowactive = PETSC_FALSE; 3369 3370 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3371 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3372 3373 if (oldmat->colmap) { 3374 #if defined(PETSC_USE_CTABLE) 3375 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3376 #else 3377 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3378 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3379 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3380 #endif 3381 } else a->colmap = 0; 3382 if (oldmat->garray) { 3383 PetscInt len; 3384 len = oldmat->B->cmap->n; 3385 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3386 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3387 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3388 } else a->garray = 0; 3389 3390 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3391 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3392 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3393 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3394 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3395 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3396 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3397 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3398 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3399 *newmat = mat; 3400 PetscFunctionReturn(0); 3401 } 3402 3403 3404 3405 #undef __FUNCT__ 3406 #define __FUNCT__ "MatLoad_MPIAIJ" 3407 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3408 { 3409 PetscScalar *vals,*svals; 3410 MPI_Comm comm; 3411 PetscErrorCode ierr; 3412 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3413 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3414 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3415 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3416 PetscInt cend,cstart,n,*rowners,sizesset=1; 3417 int fd; 3418 PetscInt bs = 1; 3419 3420 PetscFunctionBegin; 3421 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3422 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3423 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3424 if (!rank) { 3425 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3426 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3427 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3428 } 3429 3430 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3431 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3432 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3433 3434 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3435 3436 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3437 M = header[1]; N = header[2]; 3438 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3439 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3440 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3441 3442 /* If global sizes are set, check if they are consistent with that given in the file */ 3443 if (sizesset) { 3444 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3445 } 3446 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3447 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3448 3449 /* determine ownership of all (block) rows */ 3450 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3451 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3452 else m = newMat->rmap->n; /* Set by user */ 3453 3454 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3455 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3456 3457 /* First process needs enough room for process with most rows */ 3458 if (!rank) { 3459 mmax = rowners[1]; 3460 for (i=2; i<=size; i++) { 3461 mmax = PetscMax(mmax, rowners[i]); 3462 } 3463 } else mmax = -1; /* unused, but compilers complain */ 3464 3465 rowners[0] = 0; 3466 for (i=2; i<=size; i++) { 3467 rowners[i] += rowners[i-1]; 3468 } 3469 rstart = rowners[rank]; 3470 rend = rowners[rank+1]; 3471 3472 /* distribute row lengths to all processors */ 3473 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 3474 if (!rank) { 3475 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3476 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3477 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3478 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3479 for (j=0; j<m; j++) { 3480 procsnz[0] += ourlens[j]; 3481 } 3482 for (i=1; i<size; i++) { 3483 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3484 /* calculate the number of nonzeros on each processor */ 3485 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3486 procsnz[i] += rowlengths[j]; 3487 } 3488 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3489 } 3490 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3491 } else { 3492 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3493 } 3494 3495 if (!rank) { 3496 /* determine max buffer needed and allocate it */ 3497 maxnz = 0; 3498 for (i=0; i<size; i++) { 3499 maxnz = PetscMax(maxnz,procsnz[i]); 3500 } 3501 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3502 3503 /* read in my part of the matrix column indices */ 3504 nz = procsnz[0]; 3505 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3506 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3507 3508 /* read in every one elses and ship off */ 3509 for (i=1; i<size; i++) { 3510 nz = procsnz[i]; 3511 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3512 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3513 } 3514 ierr = PetscFree(cols);CHKERRQ(ierr); 3515 } else { 3516 /* determine buffer space needed for message */ 3517 nz = 0; 3518 for (i=0; i<m; i++) { 3519 nz += ourlens[i]; 3520 } 3521 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3522 3523 /* receive message of column indices*/ 3524 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3525 } 3526 3527 /* determine column ownership if matrix is not square */ 3528 if (N != M) { 3529 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3530 else n = newMat->cmap->n; 3531 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3532 cstart = cend - n; 3533 } else { 3534 cstart = rstart; 3535 cend = rend; 3536 n = cend - cstart; 3537 } 3538 3539 /* loop over local rows, determining number of off diagonal entries */ 3540 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3541 jj = 0; 3542 for (i=0; i<m; i++) { 3543 for (j=0; j<ourlens[i]; j++) { 3544 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3545 jj++; 3546 } 3547 } 3548 3549 for (i=0; i<m; i++) { 3550 ourlens[i] -= offlens[i]; 3551 } 3552 if (!sizesset) { 3553 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3554 } 3555 3556 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3557 3558 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3559 3560 for (i=0; i<m; i++) { 3561 ourlens[i] += offlens[i]; 3562 } 3563 3564 if (!rank) { 3565 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3566 3567 /* read in my part of the matrix numerical values */ 3568 nz = procsnz[0]; 3569 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3570 3571 /* insert into matrix */ 3572 jj = rstart; 3573 smycols = mycols; 3574 svals = vals; 3575 for (i=0; i<m; i++) { 3576 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3577 smycols += ourlens[i]; 3578 svals += ourlens[i]; 3579 jj++; 3580 } 3581 3582 /* read in other processors and ship out */ 3583 for (i=1; i<size; i++) { 3584 nz = procsnz[i]; 3585 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3586 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3587 } 3588 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3589 } else { 3590 /* receive numeric values */ 3591 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3592 3593 /* receive message of values*/ 3594 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3595 3596 /* insert into matrix */ 3597 jj = rstart; 3598 smycols = mycols; 3599 svals = vals; 3600 for (i=0; i<m; i++) { 3601 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3602 smycols += ourlens[i]; 3603 svals += ourlens[i]; 3604 jj++; 3605 } 3606 } 3607 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3608 ierr = PetscFree(vals);CHKERRQ(ierr); 3609 ierr = PetscFree(mycols);CHKERRQ(ierr); 3610 ierr = PetscFree(rowners);CHKERRQ(ierr); 3611 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3612 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3613 PetscFunctionReturn(0); 3614 } 3615 3616 #undef __FUNCT__ 3617 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3618 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3619 { 3620 PetscErrorCode ierr; 3621 IS iscol_local; 3622 PetscInt csize; 3623 3624 PetscFunctionBegin; 3625 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3626 if (call == MAT_REUSE_MATRIX) { 3627 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3628 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3629 } else { 3630 PetscInt cbs; 3631 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3632 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3633 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3634 } 3635 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3636 if (call == MAT_INITIAL_MATRIX) { 3637 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3638 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3639 } 3640 PetscFunctionReturn(0); 3641 } 3642 3643 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3644 #undef __FUNCT__ 3645 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3646 /* 3647 Not great since it makes two copies of the submatrix, first an SeqAIJ 3648 in local and then by concatenating the local matrices the end result. 3649 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3650 3651 Note: This requires a sequential iscol with all indices. 3652 */ 3653 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3654 { 3655 PetscErrorCode ierr; 3656 PetscMPIInt rank,size; 3657 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3658 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3659 PetscBool allcolumns, colflag; 3660 Mat M,Mreuse; 3661 MatScalar *vwork,*aa; 3662 MPI_Comm comm; 3663 Mat_SeqAIJ *aij; 3664 3665 PetscFunctionBegin; 3666 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3667 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3668 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3669 3670 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3671 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3672 if (colflag && ncol == mat->cmap->N) { 3673 allcolumns = PETSC_TRUE; 3674 } else { 3675 allcolumns = PETSC_FALSE; 3676 } 3677 if (call == MAT_REUSE_MATRIX) { 3678 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3679 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3680 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3681 } else { 3682 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3683 } 3684 3685 /* 3686 m - number of local rows 3687 n - number of columns (same on all processors) 3688 rstart - first row in new global matrix generated 3689 */ 3690 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3691 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3692 if (call == MAT_INITIAL_MATRIX) { 3693 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3694 ii = aij->i; 3695 jj = aij->j; 3696 3697 /* 3698 Determine the number of non-zeros in the diagonal and off-diagonal 3699 portions of the matrix in order to do correct preallocation 3700 */ 3701 3702 /* first get start and end of "diagonal" columns */ 3703 if (csize == PETSC_DECIDE) { 3704 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3705 if (mglobal == n) { /* square matrix */ 3706 nlocal = m; 3707 } else { 3708 nlocal = n/size + ((n % size) > rank); 3709 } 3710 } else { 3711 nlocal = csize; 3712 } 3713 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3714 rstart = rend - nlocal; 3715 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3716 3717 /* next, compute all the lengths */ 3718 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3719 olens = dlens + m; 3720 for (i=0; i<m; i++) { 3721 jend = ii[i+1] - ii[i]; 3722 olen = 0; 3723 dlen = 0; 3724 for (j=0; j<jend; j++) { 3725 if (*jj < rstart || *jj >= rend) olen++; 3726 else dlen++; 3727 jj++; 3728 } 3729 olens[i] = olen; 3730 dlens[i] = dlen; 3731 } 3732 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3733 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3734 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3735 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3736 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3737 ierr = PetscFree(dlens);CHKERRQ(ierr); 3738 } else { 3739 PetscInt ml,nl; 3740 3741 M = *newmat; 3742 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3743 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3744 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3745 /* 3746 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3747 rather than the slower MatSetValues(). 3748 */ 3749 M->was_assembled = PETSC_TRUE; 3750 M->assembled = PETSC_FALSE; 3751 } 3752 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3753 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3754 ii = aij->i; 3755 jj = aij->j; 3756 aa = aij->a; 3757 for (i=0; i<m; i++) { 3758 row = rstart + i; 3759 nz = ii[i+1] - ii[i]; 3760 cwork = jj; jj += nz; 3761 vwork = aa; aa += nz; 3762 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3763 } 3764 3765 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3766 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3767 *newmat = M; 3768 3769 /* save submatrix used in processor for next request */ 3770 if (call == MAT_INITIAL_MATRIX) { 3771 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3772 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3773 } 3774 PetscFunctionReturn(0); 3775 } 3776 3777 #undef __FUNCT__ 3778 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3779 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3780 { 3781 PetscInt m,cstart, cend,j,nnz,i,d; 3782 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3783 const PetscInt *JJ; 3784 PetscScalar *values; 3785 PetscErrorCode ierr; 3786 3787 PetscFunctionBegin; 3788 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3789 3790 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3791 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3792 m = B->rmap->n; 3793 cstart = B->cmap->rstart; 3794 cend = B->cmap->rend; 3795 rstart = B->rmap->rstart; 3796 3797 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3798 3799 #if defined(PETSC_USE_DEBUGGING) 3800 for (i=0; i<m; i++) { 3801 nnz = Ii[i+1]- Ii[i]; 3802 JJ = J + Ii[i]; 3803 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3804 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3805 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3806 } 3807 #endif 3808 3809 for (i=0; i<m; i++) { 3810 nnz = Ii[i+1]- Ii[i]; 3811 JJ = J + Ii[i]; 3812 nnz_max = PetscMax(nnz_max,nnz); 3813 d = 0; 3814 for (j=0; j<nnz; j++) { 3815 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3816 } 3817 d_nnz[i] = d; 3818 o_nnz[i] = nnz - d; 3819 } 3820 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3821 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3822 3823 if (v) values = (PetscScalar*)v; 3824 else { 3825 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3826 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3827 } 3828 3829 for (i=0; i<m; i++) { 3830 ii = i + rstart; 3831 nnz = Ii[i+1]- Ii[i]; 3832 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3833 } 3834 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3835 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3836 3837 if (!v) { 3838 ierr = PetscFree(values);CHKERRQ(ierr); 3839 } 3840 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3841 PetscFunctionReturn(0); 3842 } 3843 3844 #undef __FUNCT__ 3845 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3846 /*@ 3847 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3848 (the default parallel PETSc format). 3849 3850 Collective on MPI_Comm 3851 3852 Input Parameters: 3853 + B - the matrix 3854 . i - the indices into j for the start of each local row (starts with zero) 3855 . j - the column indices for each local row (starts with zero) 3856 - v - optional values in the matrix 3857 3858 Level: developer 3859 3860 Notes: 3861 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3862 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3863 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3864 3865 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3866 3867 The format which is used for the sparse matrix input, is equivalent to a 3868 row-major ordering.. i.e for the following matrix, the input data expected is 3869 as shown: 3870 3871 1 0 0 3872 2 0 3 P0 3873 ------- 3874 4 5 6 P1 3875 3876 Process0 [P0]: rows_owned=[0,1] 3877 i = {0,1,3} [size = nrow+1 = 2+1] 3878 j = {0,0,2} [size = nz = 6] 3879 v = {1,2,3} [size = nz = 6] 3880 3881 Process1 [P1]: rows_owned=[2] 3882 i = {0,3} [size = nrow+1 = 1+1] 3883 j = {0,1,2} [size = nz = 6] 3884 v = {4,5,6} [size = nz = 6] 3885 3886 .keywords: matrix, aij, compressed row, sparse, parallel 3887 3888 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3889 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3890 @*/ 3891 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3892 { 3893 PetscErrorCode ierr; 3894 3895 PetscFunctionBegin; 3896 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3897 PetscFunctionReturn(0); 3898 } 3899 3900 #undef __FUNCT__ 3901 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3902 /*@C 3903 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3904 (the default parallel PETSc format). For good matrix assembly performance 3905 the user should preallocate the matrix storage by setting the parameters 3906 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3907 performance can be increased by more than a factor of 50. 3908 3909 Collective on MPI_Comm 3910 3911 Input Parameters: 3912 + A - the matrix 3913 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3914 (same value is used for all local rows) 3915 . d_nnz - array containing the number of nonzeros in the various rows of the 3916 DIAGONAL portion of the local submatrix (possibly different for each row) 3917 or NULL, if d_nz is used to specify the nonzero structure. 3918 The size of this array is equal to the number of local rows, i.e 'm'. 3919 For matrices that will be factored, you must leave room for (and set) 3920 the diagonal entry even if it is zero. 3921 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3922 submatrix (same value is used for all local rows). 3923 - o_nnz - array containing the number of nonzeros in the various rows of the 3924 OFF-DIAGONAL portion of the local submatrix (possibly different for 3925 each row) or NULL, if o_nz is used to specify the nonzero 3926 structure. The size of this array is equal to the number 3927 of local rows, i.e 'm'. 3928 3929 If the *_nnz parameter is given then the *_nz parameter is ignored 3930 3931 The AIJ format (also called the Yale sparse matrix format or 3932 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3933 storage. The stored row and column indices begin with zero. 3934 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3935 3936 The parallel matrix is partitioned such that the first m0 rows belong to 3937 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3938 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3939 3940 The DIAGONAL portion of the local submatrix of a processor can be defined 3941 as the submatrix which is obtained by extraction the part corresponding to 3942 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3943 first row that belongs to the processor, r2 is the last row belonging to 3944 the this processor, and c1-c2 is range of indices of the local part of a 3945 vector suitable for applying the matrix to. This is an mxn matrix. In the 3946 common case of a square matrix, the row and column ranges are the same and 3947 the DIAGONAL part is also square. The remaining portion of the local 3948 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3949 3950 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3951 3952 You can call MatGetInfo() to get information on how effective the preallocation was; 3953 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3954 You can also run with the option -info and look for messages with the string 3955 malloc in them to see if additional memory allocation was needed. 3956 3957 Example usage: 3958 3959 Consider the following 8x8 matrix with 34 non-zero values, that is 3960 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3961 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3962 as follows: 3963 3964 .vb 3965 1 2 0 | 0 3 0 | 0 4 3966 Proc0 0 5 6 | 7 0 0 | 8 0 3967 9 0 10 | 11 0 0 | 12 0 3968 ------------------------------------- 3969 13 0 14 | 15 16 17 | 0 0 3970 Proc1 0 18 0 | 19 20 21 | 0 0 3971 0 0 0 | 22 23 0 | 24 0 3972 ------------------------------------- 3973 Proc2 25 26 27 | 0 0 28 | 29 0 3974 30 0 0 | 31 32 33 | 0 34 3975 .ve 3976 3977 This can be represented as a collection of submatrices as: 3978 3979 .vb 3980 A B C 3981 D E F 3982 G H I 3983 .ve 3984 3985 Where the submatrices A,B,C are owned by proc0, D,E,F are 3986 owned by proc1, G,H,I are owned by proc2. 3987 3988 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3989 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3990 The 'M','N' parameters are 8,8, and have the same values on all procs. 3991 3992 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3993 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3994 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3995 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3996 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3997 matrix, ans [DF] as another SeqAIJ matrix. 3998 3999 When d_nz, o_nz parameters are specified, d_nz storage elements are 4000 allocated for every row of the local diagonal submatrix, and o_nz 4001 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4002 One way to choose d_nz and o_nz is to use the max nonzerors per local 4003 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4004 In this case, the values of d_nz,o_nz are: 4005 .vb 4006 proc0 : dnz = 2, o_nz = 2 4007 proc1 : dnz = 3, o_nz = 2 4008 proc2 : dnz = 1, o_nz = 4 4009 .ve 4010 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4011 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4012 for proc3. i.e we are using 12+15+10=37 storage locations to store 4013 34 values. 4014 4015 When d_nnz, o_nnz parameters are specified, the storage is specified 4016 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4017 In the above case the values for d_nnz,o_nnz are: 4018 .vb 4019 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4020 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4021 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4022 .ve 4023 Here the space allocated is sum of all the above values i.e 34, and 4024 hence pre-allocation is perfect. 4025 4026 Level: intermediate 4027 4028 .keywords: matrix, aij, compressed row, sparse, parallel 4029 4030 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4031 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4032 @*/ 4033 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4034 { 4035 PetscErrorCode ierr; 4036 4037 PetscFunctionBegin; 4038 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4039 PetscValidType(B,1); 4040 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4041 PetscFunctionReturn(0); 4042 } 4043 4044 #undef __FUNCT__ 4045 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4046 /*@ 4047 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4048 CSR format the local rows. 4049 4050 Collective on MPI_Comm 4051 4052 Input Parameters: 4053 + comm - MPI communicator 4054 . m - number of local rows (Cannot be PETSC_DECIDE) 4055 . n - This value should be the same as the local size used in creating the 4056 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4057 calculated if N is given) For square matrices n is almost always m. 4058 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4059 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4060 . i - row indices 4061 . j - column indices 4062 - a - matrix values 4063 4064 Output Parameter: 4065 . mat - the matrix 4066 4067 Level: intermediate 4068 4069 Notes: 4070 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4071 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4072 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4073 4074 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4075 4076 The format which is used for the sparse matrix input, is equivalent to a 4077 row-major ordering.. i.e for the following matrix, the input data expected is 4078 as shown: 4079 4080 1 0 0 4081 2 0 3 P0 4082 ------- 4083 4 5 6 P1 4084 4085 Process0 [P0]: rows_owned=[0,1] 4086 i = {0,1,3} [size = nrow+1 = 2+1] 4087 j = {0,0,2} [size = nz = 6] 4088 v = {1,2,3} [size = nz = 6] 4089 4090 Process1 [P1]: rows_owned=[2] 4091 i = {0,3} [size = nrow+1 = 1+1] 4092 j = {0,1,2} [size = nz = 6] 4093 v = {4,5,6} [size = nz = 6] 4094 4095 .keywords: matrix, aij, compressed row, sparse, parallel 4096 4097 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4098 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4099 @*/ 4100 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4101 { 4102 PetscErrorCode ierr; 4103 4104 PetscFunctionBegin; 4105 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4106 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4107 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4108 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4109 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4110 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4111 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4112 PetscFunctionReturn(0); 4113 } 4114 4115 #undef __FUNCT__ 4116 #define __FUNCT__ "MatCreateAIJ" 4117 /*@C 4118 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4119 (the default parallel PETSc format). For good matrix assembly performance 4120 the user should preallocate the matrix storage by setting the parameters 4121 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4122 performance can be increased by more than a factor of 50. 4123 4124 Collective on MPI_Comm 4125 4126 Input Parameters: 4127 + comm - MPI communicator 4128 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4129 This value should be the same as the local size used in creating the 4130 y vector for the matrix-vector product y = Ax. 4131 . n - This value should be the same as the local size used in creating the 4132 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4133 calculated if N is given) For square matrices n is almost always m. 4134 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4135 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4136 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4137 (same value is used for all local rows) 4138 . d_nnz - array containing the number of nonzeros in the various rows of the 4139 DIAGONAL portion of the local submatrix (possibly different for each row) 4140 or NULL, if d_nz is used to specify the nonzero structure. 4141 The size of this array is equal to the number of local rows, i.e 'm'. 4142 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4143 submatrix (same value is used for all local rows). 4144 - o_nnz - array containing the number of nonzeros in the various rows of the 4145 OFF-DIAGONAL portion of the local submatrix (possibly different for 4146 each row) or NULL, if o_nz is used to specify the nonzero 4147 structure. The size of this array is equal to the number 4148 of local rows, i.e 'm'. 4149 4150 Output Parameter: 4151 . A - the matrix 4152 4153 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4154 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4155 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4156 4157 Notes: 4158 If the *_nnz parameter is given then the *_nz parameter is ignored 4159 4160 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4161 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4162 storage requirements for this matrix. 4163 4164 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4165 processor than it must be used on all processors that share the object for 4166 that argument. 4167 4168 The user MUST specify either the local or global matrix dimensions 4169 (possibly both). 4170 4171 The parallel matrix is partitioned across processors such that the 4172 first m0 rows belong to process 0, the next m1 rows belong to 4173 process 1, the next m2 rows belong to process 2 etc.. where 4174 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4175 values corresponding to [m x N] submatrix. 4176 4177 The columns are logically partitioned with the n0 columns belonging 4178 to 0th partition, the next n1 columns belonging to the next 4179 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4180 4181 The DIAGONAL portion of the local submatrix on any given processor 4182 is the submatrix corresponding to the rows and columns m,n 4183 corresponding to the given processor. i.e diagonal matrix on 4184 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4185 etc. The remaining portion of the local submatrix [m x (N-n)] 4186 constitute the OFF-DIAGONAL portion. The example below better 4187 illustrates this concept. 4188 4189 For a square global matrix we define each processor's diagonal portion 4190 to be its local rows and the corresponding columns (a square submatrix); 4191 each processor's off-diagonal portion encompasses the remainder of the 4192 local matrix (a rectangular submatrix). 4193 4194 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4195 4196 When calling this routine with a single process communicator, a matrix of 4197 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4198 type of communicator, use the construction mechanism: 4199 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4200 4201 By default, this format uses inodes (identical nodes) when possible. 4202 We search for consecutive rows with the same nonzero structure, thereby 4203 reusing matrix information to achieve increased efficiency. 4204 4205 Options Database Keys: 4206 + -mat_no_inode - Do not use inodes 4207 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4208 - -mat_aij_oneindex - Internally use indexing starting at 1 4209 rather than 0. Note that when calling MatSetValues(), 4210 the user still MUST index entries starting at 0! 4211 4212 4213 Example usage: 4214 4215 Consider the following 8x8 matrix with 34 non-zero values, that is 4216 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4217 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4218 as follows: 4219 4220 .vb 4221 1 2 0 | 0 3 0 | 0 4 4222 Proc0 0 5 6 | 7 0 0 | 8 0 4223 9 0 10 | 11 0 0 | 12 0 4224 ------------------------------------- 4225 13 0 14 | 15 16 17 | 0 0 4226 Proc1 0 18 0 | 19 20 21 | 0 0 4227 0 0 0 | 22 23 0 | 24 0 4228 ------------------------------------- 4229 Proc2 25 26 27 | 0 0 28 | 29 0 4230 30 0 0 | 31 32 33 | 0 34 4231 .ve 4232 4233 This can be represented as a collection of submatrices as: 4234 4235 .vb 4236 A B C 4237 D E F 4238 G H I 4239 .ve 4240 4241 Where the submatrices A,B,C are owned by proc0, D,E,F are 4242 owned by proc1, G,H,I are owned by proc2. 4243 4244 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4245 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4246 The 'M','N' parameters are 8,8, and have the same values on all procs. 4247 4248 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4249 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4250 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4251 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4252 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4253 matrix, ans [DF] as another SeqAIJ matrix. 4254 4255 When d_nz, o_nz parameters are specified, d_nz storage elements are 4256 allocated for every row of the local diagonal submatrix, and o_nz 4257 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4258 One way to choose d_nz and o_nz is to use the max nonzerors per local 4259 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4260 In this case, the values of d_nz,o_nz are: 4261 .vb 4262 proc0 : dnz = 2, o_nz = 2 4263 proc1 : dnz = 3, o_nz = 2 4264 proc2 : dnz = 1, o_nz = 4 4265 .ve 4266 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4267 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4268 for proc3. i.e we are using 12+15+10=37 storage locations to store 4269 34 values. 4270 4271 When d_nnz, o_nnz parameters are specified, the storage is specified 4272 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4273 In the above case the values for d_nnz,o_nnz are: 4274 .vb 4275 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4276 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4277 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4278 .ve 4279 Here the space allocated is sum of all the above values i.e 34, and 4280 hence pre-allocation is perfect. 4281 4282 Level: intermediate 4283 4284 .keywords: matrix, aij, compressed row, sparse, parallel 4285 4286 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4287 MPIAIJ, MatCreateMPIAIJWithArrays() 4288 @*/ 4289 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4290 { 4291 PetscErrorCode ierr; 4292 PetscMPIInt size; 4293 4294 PetscFunctionBegin; 4295 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4296 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4297 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4298 if (size > 1) { 4299 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4300 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4301 } else { 4302 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4303 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4304 } 4305 PetscFunctionReturn(0); 4306 } 4307 4308 #undef __FUNCT__ 4309 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4310 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4311 { 4312 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4313 4314 PetscFunctionBegin; 4315 *Ad = a->A; 4316 *Ao = a->B; 4317 *colmap = a->garray; 4318 PetscFunctionReturn(0); 4319 } 4320 4321 #undef __FUNCT__ 4322 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4323 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4324 { 4325 PetscErrorCode ierr; 4326 PetscInt i; 4327 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4328 4329 PetscFunctionBegin; 4330 if (coloring->ctype == IS_COLORING_GLOBAL) { 4331 ISColoringValue *allcolors,*colors; 4332 ISColoring ocoloring; 4333 4334 /* set coloring for diagonal portion */ 4335 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4336 4337 /* set coloring for off-diagonal portion */ 4338 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4339 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4340 for (i=0; i<a->B->cmap->n; i++) { 4341 colors[i] = allcolors[a->garray[i]]; 4342 } 4343 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4344 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4345 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4346 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4347 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4348 ISColoringValue *colors; 4349 PetscInt *larray; 4350 ISColoring ocoloring; 4351 4352 /* set coloring for diagonal portion */ 4353 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4354 for (i=0; i<a->A->cmap->n; i++) { 4355 larray[i] = i + A->cmap->rstart; 4356 } 4357 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4358 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4359 for (i=0; i<a->A->cmap->n; i++) { 4360 colors[i] = coloring->colors[larray[i]]; 4361 } 4362 ierr = PetscFree(larray);CHKERRQ(ierr); 4363 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4364 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4365 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4366 4367 /* set coloring for off-diagonal portion */ 4368 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4369 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4370 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4371 for (i=0; i<a->B->cmap->n; i++) { 4372 colors[i] = coloring->colors[larray[i]]; 4373 } 4374 ierr = PetscFree(larray);CHKERRQ(ierr); 4375 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4376 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4377 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4378 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4379 PetscFunctionReturn(0); 4380 } 4381 4382 #undef __FUNCT__ 4383 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4384 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4385 { 4386 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4387 PetscErrorCode ierr; 4388 4389 PetscFunctionBegin; 4390 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4391 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4392 PetscFunctionReturn(0); 4393 } 4394 4395 #undef __FUNCT__ 4396 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4397 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4398 { 4399 PetscErrorCode ierr; 4400 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4401 PetscInt *indx; 4402 4403 PetscFunctionBegin; 4404 /* This routine will ONLY return MPIAIJ type matrix */ 4405 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4406 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4407 if (n == PETSC_DECIDE) { 4408 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4409 } 4410 /* Check sum(n) = N */ 4411 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4412 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4413 4414 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4415 rstart -= m; 4416 4417 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4418 for (i=0; i<m; i++) { 4419 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4420 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4421 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4422 } 4423 4424 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4425 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4426 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4427 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4428 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4429 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4430 PetscFunctionReturn(0); 4431 } 4432 4433 #undef __FUNCT__ 4434 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4435 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4436 { 4437 PetscErrorCode ierr; 4438 PetscInt m,N,i,rstart,nnz,Ii; 4439 PetscInt *indx; 4440 PetscScalar *values; 4441 4442 PetscFunctionBegin; 4443 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4444 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4445 for (i=0; i<m; i++) { 4446 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4447 Ii = i + rstart; 4448 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4449 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4450 } 4451 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4452 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4453 PetscFunctionReturn(0); 4454 } 4455 4456 #undef __FUNCT__ 4457 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4458 /*@ 4459 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4460 matrices from each processor 4461 4462 Collective on MPI_Comm 4463 4464 Input Parameters: 4465 + comm - the communicators the parallel matrix will live on 4466 . inmat - the input sequential matrices 4467 . n - number of local columns (or PETSC_DECIDE) 4468 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4469 4470 Output Parameter: 4471 . outmat - the parallel matrix generated 4472 4473 Level: advanced 4474 4475 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4476 4477 @*/ 4478 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4479 { 4480 PetscErrorCode ierr; 4481 4482 PetscFunctionBegin; 4483 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4484 if (scall == MAT_INITIAL_MATRIX) { 4485 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4486 } 4487 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4488 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4489 PetscFunctionReturn(0); 4490 } 4491 4492 #undef __FUNCT__ 4493 #define __FUNCT__ "MatFileSplit" 4494 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4495 { 4496 PetscErrorCode ierr; 4497 PetscMPIInt rank; 4498 PetscInt m,N,i,rstart,nnz; 4499 size_t len; 4500 const PetscInt *indx; 4501 PetscViewer out; 4502 char *name; 4503 Mat B; 4504 const PetscScalar *values; 4505 4506 PetscFunctionBegin; 4507 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4508 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4509 /* Should this be the type of the diagonal block of A? */ 4510 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4511 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4512 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4513 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4514 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4515 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4516 for (i=0; i<m; i++) { 4517 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4518 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4519 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4520 } 4521 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4522 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4523 4524 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4525 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4526 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4527 sprintf(name,"%s.%d",outfile,rank); 4528 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4529 ierr = PetscFree(name);CHKERRQ(ierr); 4530 ierr = MatView(B,out);CHKERRQ(ierr); 4531 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4532 ierr = MatDestroy(&B);CHKERRQ(ierr); 4533 PetscFunctionReturn(0); 4534 } 4535 4536 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4537 #undef __FUNCT__ 4538 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4539 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4540 { 4541 PetscErrorCode ierr; 4542 Mat_Merge_SeqsToMPI *merge; 4543 PetscContainer container; 4544 4545 PetscFunctionBegin; 4546 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4547 if (container) { 4548 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4549 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4550 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4551 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4552 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4553 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4554 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4555 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4556 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4557 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4558 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4559 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4560 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4561 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4562 ierr = PetscFree(merge);CHKERRQ(ierr); 4563 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4564 } 4565 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4566 PetscFunctionReturn(0); 4567 } 4568 4569 #include <../src/mat/utils/freespace.h> 4570 #include <petscbt.h> 4571 4572 #undef __FUNCT__ 4573 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4574 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4575 { 4576 PetscErrorCode ierr; 4577 MPI_Comm comm; 4578 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4579 PetscMPIInt size,rank,taga,*len_s; 4580 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4581 PetscInt proc,m; 4582 PetscInt **buf_ri,**buf_rj; 4583 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4584 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4585 MPI_Request *s_waits,*r_waits; 4586 MPI_Status *status; 4587 MatScalar *aa=a->a; 4588 MatScalar **abuf_r,*ba_i; 4589 Mat_Merge_SeqsToMPI *merge; 4590 PetscContainer container; 4591 4592 PetscFunctionBegin; 4593 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4594 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4595 4596 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4597 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4598 4599 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4600 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4601 4602 bi = merge->bi; 4603 bj = merge->bj; 4604 buf_ri = merge->buf_ri; 4605 buf_rj = merge->buf_rj; 4606 4607 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4608 owners = merge->rowmap->range; 4609 len_s = merge->len_s; 4610 4611 /* send and recv matrix values */ 4612 /*-----------------------------*/ 4613 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4614 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4615 4616 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4617 for (proc=0,k=0; proc<size; proc++) { 4618 if (!len_s[proc]) continue; 4619 i = owners[proc]; 4620 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4621 k++; 4622 } 4623 4624 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4625 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4626 ierr = PetscFree(status);CHKERRQ(ierr); 4627 4628 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4629 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4630 4631 /* insert mat values of mpimat */ 4632 /*----------------------------*/ 4633 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4634 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4635 4636 for (k=0; k<merge->nrecv; k++) { 4637 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4638 nrows = *(buf_ri_k[k]); 4639 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4640 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4641 } 4642 4643 /* set values of ba */ 4644 m = merge->rowmap->n; 4645 for (i=0; i<m; i++) { 4646 arow = owners[rank] + i; 4647 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4648 bnzi = bi[i+1] - bi[i]; 4649 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4650 4651 /* add local non-zero vals of this proc's seqmat into ba */ 4652 anzi = ai[arow+1] - ai[arow]; 4653 aj = a->j + ai[arow]; 4654 aa = a->a + ai[arow]; 4655 nextaj = 0; 4656 for (j=0; nextaj<anzi; j++) { 4657 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4658 ba_i[j] += aa[nextaj++]; 4659 } 4660 } 4661 4662 /* add received vals into ba */ 4663 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4664 /* i-th row */ 4665 if (i == *nextrow[k]) { 4666 anzi = *(nextai[k]+1) - *nextai[k]; 4667 aj = buf_rj[k] + *(nextai[k]); 4668 aa = abuf_r[k] + *(nextai[k]); 4669 nextaj = 0; 4670 for (j=0; nextaj<anzi; j++) { 4671 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4672 ba_i[j] += aa[nextaj++]; 4673 } 4674 } 4675 nextrow[k]++; nextai[k]++; 4676 } 4677 } 4678 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4679 } 4680 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4681 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4682 4683 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4684 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4685 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4686 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4687 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4688 PetscFunctionReturn(0); 4689 } 4690 4691 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4692 4693 #undef __FUNCT__ 4694 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4695 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4696 { 4697 PetscErrorCode ierr; 4698 Mat B_mpi; 4699 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4700 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4701 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4702 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4703 PetscInt len,proc,*dnz,*onz,bs,cbs; 4704 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4705 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4706 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4707 MPI_Status *status; 4708 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4709 PetscBT lnkbt; 4710 Mat_Merge_SeqsToMPI *merge; 4711 PetscContainer container; 4712 4713 PetscFunctionBegin; 4714 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4715 4716 /* make sure it is a PETSc comm */ 4717 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4718 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4719 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4720 4721 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4722 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4723 4724 /* determine row ownership */ 4725 /*---------------------------------------------------------*/ 4726 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4727 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4728 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4729 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4730 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4731 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4732 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4733 4734 m = merge->rowmap->n; 4735 owners = merge->rowmap->range; 4736 4737 /* determine the number of messages to send, their lengths */ 4738 /*---------------------------------------------------------*/ 4739 len_s = merge->len_s; 4740 4741 len = 0; /* length of buf_si[] */ 4742 merge->nsend = 0; 4743 for (proc=0; proc<size; proc++) { 4744 len_si[proc] = 0; 4745 if (proc == rank) { 4746 len_s[proc] = 0; 4747 } else { 4748 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4749 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4750 } 4751 if (len_s[proc]) { 4752 merge->nsend++; 4753 nrows = 0; 4754 for (i=owners[proc]; i<owners[proc+1]; i++) { 4755 if (ai[i+1] > ai[i]) nrows++; 4756 } 4757 len_si[proc] = 2*(nrows+1); 4758 len += len_si[proc]; 4759 } 4760 } 4761 4762 /* determine the number and length of messages to receive for ij-structure */ 4763 /*-------------------------------------------------------------------------*/ 4764 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4765 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4766 4767 /* post the Irecv of j-structure */ 4768 /*-------------------------------*/ 4769 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4770 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4771 4772 /* post the Isend of j-structure */ 4773 /*--------------------------------*/ 4774 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4775 4776 for (proc=0, k=0; proc<size; proc++) { 4777 if (!len_s[proc]) continue; 4778 i = owners[proc]; 4779 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4780 k++; 4781 } 4782 4783 /* receives and sends of j-structure are complete */ 4784 /*------------------------------------------------*/ 4785 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4786 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4787 4788 /* send and recv i-structure */ 4789 /*---------------------------*/ 4790 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4791 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4792 4793 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4794 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4795 for (proc=0,k=0; proc<size; proc++) { 4796 if (!len_s[proc]) continue; 4797 /* form outgoing message for i-structure: 4798 buf_si[0]: nrows to be sent 4799 [1:nrows]: row index (global) 4800 [nrows+1:2*nrows+1]: i-structure index 4801 */ 4802 /*-------------------------------------------*/ 4803 nrows = len_si[proc]/2 - 1; 4804 buf_si_i = buf_si + nrows+1; 4805 buf_si[0] = nrows; 4806 buf_si_i[0] = 0; 4807 nrows = 0; 4808 for (i=owners[proc]; i<owners[proc+1]; i++) { 4809 anzi = ai[i+1] - ai[i]; 4810 if (anzi) { 4811 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4812 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4813 nrows++; 4814 } 4815 } 4816 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4817 k++; 4818 buf_si += len_si[proc]; 4819 } 4820 4821 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4822 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4823 4824 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4825 for (i=0; i<merge->nrecv; i++) { 4826 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4827 } 4828 4829 ierr = PetscFree(len_si);CHKERRQ(ierr); 4830 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4831 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4832 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4833 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4834 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4835 ierr = PetscFree(status);CHKERRQ(ierr); 4836 4837 /* compute a local seq matrix in each processor */ 4838 /*----------------------------------------------*/ 4839 /* allocate bi array and free space for accumulating nonzero column info */ 4840 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4841 bi[0] = 0; 4842 4843 /* create and initialize a linked list */ 4844 nlnk = N+1; 4845 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4846 4847 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4848 len = ai[owners[rank+1]] - ai[owners[rank]]; 4849 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4850 4851 current_space = free_space; 4852 4853 /* determine symbolic info for each local row */ 4854 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4855 4856 for (k=0; k<merge->nrecv; k++) { 4857 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4858 nrows = *buf_ri_k[k]; 4859 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4860 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4861 } 4862 4863 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4864 len = 0; 4865 for (i=0; i<m; i++) { 4866 bnzi = 0; 4867 /* add local non-zero cols of this proc's seqmat into lnk */ 4868 arow = owners[rank] + i; 4869 anzi = ai[arow+1] - ai[arow]; 4870 aj = a->j + ai[arow]; 4871 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4872 bnzi += nlnk; 4873 /* add received col data into lnk */ 4874 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4875 if (i == *nextrow[k]) { /* i-th row */ 4876 anzi = *(nextai[k]+1) - *nextai[k]; 4877 aj = buf_rj[k] + *nextai[k]; 4878 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4879 bnzi += nlnk; 4880 nextrow[k]++; nextai[k]++; 4881 } 4882 } 4883 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4884 4885 /* if free space is not available, make more free space */ 4886 if (current_space->local_remaining<bnzi) { 4887 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4888 nspacedouble++; 4889 } 4890 /* copy data into free space, then initialize lnk */ 4891 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4892 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4893 4894 current_space->array += bnzi; 4895 current_space->local_used += bnzi; 4896 current_space->local_remaining -= bnzi; 4897 4898 bi[i+1] = bi[i] + bnzi; 4899 } 4900 4901 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4902 4903 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4904 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4905 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4906 4907 /* create symbolic parallel matrix B_mpi */ 4908 /*---------------------------------------*/ 4909 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4910 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4911 if (n==PETSC_DECIDE) { 4912 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4913 } else { 4914 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4915 } 4916 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4917 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4918 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4919 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4920 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4921 4922 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4923 B_mpi->assembled = PETSC_FALSE; 4924 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4925 merge->bi = bi; 4926 merge->bj = bj; 4927 merge->buf_ri = buf_ri; 4928 merge->buf_rj = buf_rj; 4929 merge->coi = NULL; 4930 merge->coj = NULL; 4931 merge->owners_co = NULL; 4932 4933 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4934 4935 /* attach the supporting struct to B_mpi for reuse */ 4936 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4937 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4938 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4939 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4940 *mpimat = B_mpi; 4941 4942 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4943 PetscFunctionReturn(0); 4944 } 4945 4946 #undef __FUNCT__ 4947 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4948 /*@C 4949 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4950 matrices from each processor 4951 4952 Collective on MPI_Comm 4953 4954 Input Parameters: 4955 + comm - the communicators the parallel matrix will live on 4956 . seqmat - the input sequential matrices 4957 . m - number of local rows (or PETSC_DECIDE) 4958 . n - number of local columns (or PETSC_DECIDE) 4959 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4960 4961 Output Parameter: 4962 . mpimat - the parallel matrix generated 4963 4964 Level: advanced 4965 4966 Notes: 4967 The dimensions of the sequential matrix in each processor MUST be the same. 4968 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4969 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4970 @*/ 4971 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4972 { 4973 PetscErrorCode ierr; 4974 PetscMPIInt size; 4975 4976 PetscFunctionBegin; 4977 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4978 if (size == 1) { 4979 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4980 if (scall == MAT_INITIAL_MATRIX) { 4981 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4982 } else { 4983 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4984 } 4985 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4986 PetscFunctionReturn(0); 4987 } 4988 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4989 if (scall == MAT_INITIAL_MATRIX) { 4990 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4991 } 4992 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4993 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4994 PetscFunctionReturn(0); 4995 } 4996 4997 #undef __FUNCT__ 4998 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4999 /*@ 5000 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5001 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5002 with MatGetSize() 5003 5004 Not Collective 5005 5006 Input Parameters: 5007 + A - the matrix 5008 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5009 5010 Output Parameter: 5011 . A_loc - the local sequential matrix generated 5012 5013 Level: developer 5014 5015 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5016 5017 @*/ 5018 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5019 { 5020 PetscErrorCode ierr; 5021 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5022 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5023 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5024 MatScalar *aa=a->a,*ba=b->a,*cam; 5025 PetscScalar *ca; 5026 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5027 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5028 PetscBool match; 5029 5030 PetscFunctionBegin; 5031 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5032 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5033 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5034 if (scall == MAT_INITIAL_MATRIX) { 5035 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5036 ci[0] = 0; 5037 for (i=0; i<am; i++) { 5038 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5039 } 5040 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5041 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5042 k = 0; 5043 for (i=0; i<am; i++) { 5044 ncols_o = bi[i+1] - bi[i]; 5045 ncols_d = ai[i+1] - ai[i]; 5046 /* off-diagonal portion of A */ 5047 for (jo=0; jo<ncols_o; jo++) { 5048 col = cmap[*bj]; 5049 if (col >= cstart) break; 5050 cj[k] = col; bj++; 5051 ca[k++] = *ba++; 5052 } 5053 /* diagonal portion of A */ 5054 for (j=0; j<ncols_d; j++) { 5055 cj[k] = cstart + *aj++; 5056 ca[k++] = *aa++; 5057 } 5058 /* off-diagonal portion of A */ 5059 for (j=jo; j<ncols_o; j++) { 5060 cj[k] = cmap[*bj++]; 5061 ca[k++] = *ba++; 5062 } 5063 } 5064 /* put together the new matrix */ 5065 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5066 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5067 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5068 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5069 mat->free_a = PETSC_TRUE; 5070 mat->free_ij = PETSC_TRUE; 5071 mat->nonew = 0; 5072 } else if (scall == MAT_REUSE_MATRIX) { 5073 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5074 ci = mat->i; cj = mat->j; cam = mat->a; 5075 for (i=0; i<am; i++) { 5076 /* off-diagonal portion of A */ 5077 ncols_o = bi[i+1] - bi[i]; 5078 for (jo=0; jo<ncols_o; jo++) { 5079 col = cmap[*bj]; 5080 if (col >= cstart) break; 5081 *cam++ = *ba++; bj++; 5082 } 5083 /* diagonal portion of A */ 5084 ncols_d = ai[i+1] - ai[i]; 5085 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5086 /* off-diagonal portion of A */ 5087 for (j=jo; j<ncols_o; j++) { 5088 *cam++ = *ba++; bj++; 5089 } 5090 } 5091 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5092 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5093 PetscFunctionReturn(0); 5094 } 5095 5096 #undef __FUNCT__ 5097 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5098 /*@C 5099 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5100 5101 Not Collective 5102 5103 Input Parameters: 5104 + A - the matrix 5105 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5106 - row, col - index sets of rows and columns to extract (or NULL) 5107 5108 Output Parameter: 5109 . A_loc - the local sequential matrix generated 5110 5111 Level: developer 5112 5113 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5114 5115 @*/ 5116 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5117 { 5118 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5119 PetscErrorCode ierr; 5120 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5121 IS isrowa,iscola; 5122 Mat *aloc; 5123 PetscBool match; 5124 5125 PetscFunctionBegin; 5126 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5127 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5128 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5129 if (!row) { 5130 start = A->rmap->rstart; end = A->rmap->rend; 5131 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5132 } else { 5133 isrowa = *row; 5134 } 5135 if (!col) { 5136 start = A->cmap->rstart; 5137 cmap = a->garray; 5138 nzA = a->A->cmap->n; 5139 nzB = a->B->cmap->n; 5140 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5141 ncols = 0; 5142 for (i=0; i<nzB; i++) { 5143 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5144 else break; 5145 } 5146 imark = i; 5147 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5148 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5149 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5150 } else { 5151 iscola = *col; 5152 } 5153 if (scall != MAT_INITIAL_MATRIX) { 5154 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5155 aloc[0] = *A_loc; 5156 } 5157 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5158 *A_loc = aloc[0]; 5159 ierr = PetscFree(aloc);CHKERRQ(ierr); 5160 if (!row) { 5161 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5162 } 5163 if (!col) { 5164 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5165 } 5166 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5167 PetscFunctionReturn(0); 5168 } 5169 5170 #undef __FUNCT__ 5171 #define __FUNCT__ "MatGetBrowsOfAcols" 5172 /*@C 5173 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5174 5175 Collective on Mat 5176 5177 Input Parameters: 5178 + A,B - the matrices in mpiaij format 5179 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5180 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5181 5182 Output Parameter: 5183 + rowb, colb - index sets of rows and columns of B to extract 5184 - B_seq - the sequential matrix generated 5185 5186 Level: developer 5187 5188 @*/ 5189 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5190 { 5191 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5192 PetscErrorCode ierr; 5193 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5194 IS isrowb,iscolb; 5195 Mat *bseq=NULL; 5196 5197 PetscFunctionBegin; 5198 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5199 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5200 } 5201 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5202 5203 if (scall == MAT_INITIAL_MATRIX) { 5204 start = A->cmap->rstart; 5205 cmap = a->garray; 5206 nzA = a->A->cmap->n; 5207 nzB = a->B->cmap->n; 5208 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5209 ncols = 0; 5210 for (i=0; i<nzB; i++) { /* row < local row index */ 5211 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5212 else break; 5213 } 5214 imark = i; 5215 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5216 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5217 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5218 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5219 } else { 5220 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5221 isrowb = *rowb; iscolb = *colb; 5222 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5223 bseq[0] = *B_seq; 5224 } 5225 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5226 *B_seq = bseq[0]; 5227 ierr = PetscFree(bseq);CHKERRQ(ierr); 5228 if (!rowb) { 5229 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5230 } else { 5231 *rowb = isrowb; 5232 } 5233 if (!colb) { 5234 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5235 } else { 5236 *colb = iscolb; 5237 } 5238 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5239 PetscFunctionReturn(0); 5240 } 5241 5242 #undef __FUNCT__ 5243 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5244 /* 5245 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5246 of the OFF-DIAGONAL portion of local A 5247 5248 Collective on Mat 5249 5250 Input Parameters: 5251 + A,B - the matrices in mpiaij format 5252 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5253 5254 Output Parameter: 5255 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5256 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5257 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5258 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5259 5260 Level: developer 5261 5262 */ 5263 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5264 { 5265 VecScatter_MPI_General *gen_to,*gen_from; 5266 PetscErrorCode ierr; 5267 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5268 Mat_SeqAIJ *b_oth; 5269 VecScatter ctx =a->Mvctx; 5270 MPI_Comm comm; 5271 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5272 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5273 PetscScalar *rvalues,*svalues; 5274 MatScalar *b_otha,*bufa,*bufA; 5275 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5276 MPI_Request *rwaits = NULL,*swaits = NULL; 5277 MPI_Status *sstatus,rstatus; 5278 PetscMPIInt jj; 5279 PetscInt *cols,sbs,rbs; 5280 PetscScalar *vals; 5281 5282 PetscFunctionBegin; 5283 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5284 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5285 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5286 } 5287 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5288 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5289 5290 gen_to = (VecScatter_MPI_General*)ctx->todata; 5291 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5292 rvalues = gen_from->values; /* holds the length of receiving row */ 5293 svalues = gen_to->values; /* holds the length of sending row */ 5294 nrecvs = gen_from->n; 5295 nsends = gen_to->n; 5296 5297 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5298 srow = gen_to->indices; /* local row index to be sent */ 5299 sstarts = gen_to->starts; 5300 sprocs = gen_to->procs; 5301 sstatus = gen_to->sstatus; 5302 sbs = gen_to->bs; 5303 rstarts = gen_from->starts; 5304 rprocs = gen_from->procs; 5305 rbs = gen_from->bs; 5306 5307 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5308 if (scall == MAT_INITIAL_MATRIX) { 5309 /* i-array */ 5310 /*---------*/ 5311 /* post receives */ 5312 for (i=0; i<nrecvs; i++) { 5313 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5314 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5315 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5316 } 5317 5318 /* pack the outgoing message */ 5319 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5320 5321 sstartsj[0] = 0; 5322 rstartsj[0] = 0; 5323 len = 0; /* total length of j or a array to be sent */ 5324 k = 0; 5325 for (i=0; i<nsends; i++) { 5326 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5327 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5328 for (j=0; j<nrows; j++) { 5329 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5330 for (l=0; l<sbs; l++) { 5331 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5332 5333 rowlen[j*sbs+l] = ncols; 5334 5335 len += ncols; 5336 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5337 } 5338 k++; 5339 } 5340 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5341 5342 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5343 } 5344 /* recvs and sends of i-array are completed */ 5345 i = nrecvs; 5346 while (i--) { 5347 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5348 } 5349 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5350 5351 /* allocate buffers for sending j and a arrays */ 5352 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5353 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5354 5355 /* create i-array of B_oth */ 5356 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5357 5358 b_othi[0] = 0; 5359 len = 0; /* total length of j or a array to be received */ 5360 k = 0; 5361 for (i=0; i<nrecvs; i++) { 5362 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5363 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5364 for (j=0; j<nrows; j++) { 5365 b_othi[k+1] = b_othi[k] + rowlen[j]; 5366 len += rowlen[j]; k++; 5367 } 5368 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5369 } 5370 5371 /* allocate space for j and a arrrays of B_oth */ 5372 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5373 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5374 5375 /* j-array */ 5376 /*---------*/ 5377 /* post receives of j-array */ 5378 for (i=0; i<nrecvs; i++) { 5379 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5380 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5381 } 5382 5383 /* pack the outgoing message j-array */ 5384 k = 0; 5385 for (i=0; i<nsends; i++) { 5386 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5387 bufJ = bufj+sstartsj[i]; 5388 for (j=0; j<nrows; j++) { 5389 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5390 for (ll=0; ll<sbs; ll++) { 5391 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5392 for (l=0; l<ncols; l++) { 5393 *bufJ++ = cols[l]; 5394 } 5395 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5396 } 5397 } 5398 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5399 } 5400 5401 /* recvs and sends of j-array are completed */ 5402 i = nrecvs; 5403 while (i--) { 5404 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5405 } 5406 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5407 } else if (scall == MAT_REUSE_MATRIX) { 5408 sstartsj = *startsj_s; 5409 rstartsj = *startsj_r; 5410 bufa = *bufa_ptr; 5411 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5412 b_otha = b_oth->a; 5413 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5414 5415 /* a-array */ 5416 /*---------*/ 5417 /* post receives of a-array */ 5418 for (i=0; i<nrecvs; i++) { 5419 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5420 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5421 } 5422 5423 /* pack the outgoing message a-array */ 5424 k = 0; 5425 for (i=0; i<nsends; i++) { 5426 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5427 bufA = bufa+sstartsj[i]; 5428 for (j=0; j<nrows; j++) { 5429 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5430 for (ll=0; ll<sbs; ll++) { 5431 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5432 for (l=0; l<ncols; l++) { 5433 *bufA++ = vals[l]; 5434 } 5435 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5436 } 5437 } 5438 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5439 } 5440 /* recvs and sends of a-array are completed */ 5441 i = nrecvs; 5442 while (i--) { 5443 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5444 } 5445 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5446 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5447 5448 if (scall == MAT_INITIAL_MATRIX) { 5449 /* put together the new matrix */ 5450 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5451 5452 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5453 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5454 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5455 b_oth->free_a = PETSC_TRUE; 5456 b_oth->free_ij = PETSC_TRUE; 5457 b_oth->nonew = 0; 5458 5459 ierr = PetscFree(bufj);CHKERRQ(ierr); 5460 if (!startsj_s || !bufa_ptr) { 5461 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5462 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5463 } else { 5464 *startsj_s = sstartsj; 5465 *startsj_r = rstartsj; 5466 *bufa_ptr = bufa; 5467 } 5468 } 5469 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5470 PetscFunctionReturn(0); 5471 } 5472 5473 #undef __FUNCT__ 5474 #define __FUNCT__ "MatGetCommunicationStructs" 5475 /*@C 5476 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5477 5478 Not Collective 5479 5480 Input Parameters: 5481 . A - The matrix in mpiaij format 5482 5483 Output Parameter: 5484 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5485 . colmap - A map from global column index to local index into lvec 5486 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5487 5488 Level: developer 5489 5490 @*/ 5491 #if defined(PETSC_USE_CTABLE) 5492 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5493 #else 5494 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5495 #endif 5496 { 5497 Mat_MPIAIJ *a; 5498 5499 PetscFunctionBegin; 5500 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5501 PetscValidPointer(lvec, 2); 5502 PetscValidPointer(colmap, 3); 5503 PetscValidPointer(multScatter, 4); 5504 a = (Mat_MPIAIJ*) A->data; 5505 if (lvec) *lvec = a->lvec; 5506 if (colmap) *colmap = a->colmap; 5507 if (multScatter) *multScatter = a->Mvctx; 5508 PetscFunctionReturn(0); 5509 } 5510 5511 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5512 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5513 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5514 5515 #undef __FUNCT__ 5516 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5517 /* 5518 Computes (B'*A')' since computing B*A directly is untenable 5519 5520 n p p 5521 ( ) ( ) ( ) 5522 m ( A ) * n ( B ) = m ( C ) 5523 ( ) ( ) ( ) 5524 5525 */ 5526 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5527 { 5528 PetscErrorCode ierr; 5529 Mat At,Bt,Ct; 5530 5531 PetscFunctionBegin; 5532 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5533 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5534 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5535 ierr = MatDestroy(&At);CHKERRQ(ierr); 5536 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5537 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5538 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5539 PetscFunctionReturn(0); 5540 } 5541 5542 #undef __FUNCT__ 5543 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5544 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5545 { 5546 PetscErrorCode ierr; 5547 PetscInt m=A->rmap->n,n=B->cmap->n; 5548 Mat Cmat; 5549 5550 PetscFunctionBegin; 5551 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5552 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5553 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5554 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5555 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5556 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5557 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5558 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5559 5560 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5561 5562 *C = Cmat; 5563 PetscFunctionReturn(0); 5564 } 5565 5566 /* ----------------------------------------------------------------*/ 5567 #undef __FUNCT__ 5568 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5569 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5570 { 5571 PetscErrorCode ierr; 5572 5573 PetscFunctionBegin; 5574 if (scall == MAT_INITIAL_MATRIX) { 5575 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5576 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5577 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5578 } 5579 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5580 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5581 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5582 PetscFunctionReturn(0); 5583 } 5584 5585 #if defined(PETSC_HAVE_MUMPS) 5586 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5587 #endif 5588 #if defined(PETSC_HAVE_PASTIX) 5589 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5590 #endif 5591 #if defined(PETSC_HAVE_SUPERLU_DIST) 5592 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5593 #endif 5594 #if defined(PETSC_HAVE_CLIQUE) 5595 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5596 #endif 5597 5598 /*MC 5599 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5600 5601 Options Database Keys: 5602 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5603 5604 Level: beginner 5605 5606 .seealso: MatCreateAIJ() 5607 M*/ 5608 5609 #undef __FUNCT__ 5610 #define __FUNCT__ "MatCreate_MPIAIJ" 5611 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5612 { 5613 Mat_MPIAIJ *b; 5614 PetscErrorCode ierr; 5615 PetscMPIInt size; 5616 5617 PetscFunctionBegin; 5618 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5619 5620 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5621 B->data = (void*)b; 5622 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5623 B->assembled = PETSC_FALSE; 5624 B->insertmode = NOT_SET_VALUES; 5625 b->size = size; 5626 5627 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5628 5629 /* build cache for off array entries formed */ 5630 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5631 5632 b->donotstash = PETSC_FALSE; 5633 b->colmap = 0; 5634 b->garray = 0; 5635 b->roworiented = PETSC_TRUE; 5636 5637 /* stuff used for matrix vector multiply */ 5638 b->lvec = NULL; 5639 b->Mvctx = NULL; 5640 5641 /* stuff for MatGetRow() */ 5642 b->rowindices = 0; 5643 b->rowvalues = 0; 5644 b->getrowactive = PETSC_FALSE; 5645 5646 /* flexible pointer used in CUSP/CUSPARSE classes */ 5647 b->spptr = NULL; 5648 5649 #if defined(PETSC_HAVE_MUMPS) 5650 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5651 #endif 5652 #if defined(PETSC_HAVE_PASTIX) 5653 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5654 #endif 5655 #if defined(PETSC_HAVE_SUPERLU_DIST) 5656 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5657 #endif 5658 #if defined(PETSC_HAVE_CLIQUE) 5659 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5660 #endif 5661 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5662 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5665 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5674 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5675 PetscFunctionReturn(0); 5676 } 5677 5678 #undef __FUNCT__ 5679 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5680 /*@ 5681 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5682 and "off-diagonal" part of the matrix in CSR format. 5683 5684 Collective on MPI_Comm 5685 5686 Input Parameters: 5687 + comm - MPI communicator 5688 . m - number of local rows (Cannot be PETSC_DECIDE) 5689 . n - This value should be the same as the local size used in creating the 5690 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5691 calculated if N is given) For square matrices n is almost always m. 5692 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5693 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5694 . i - row indices for "diagonal" portion of matrix 5695 . j - column indices 5696 . a - matrix values 5697 . oi - row indices for "off-diagonal" portion of matrix 5698 . oj - column indices 5699 - oa - matrix values 5700 5701 Output Parameter: 5702 . mat - the matrix 5703 5704 Level: advanced 5705 5706 Notes: 5707 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5708 must free the arrays once the matrix has been destroyed and not before. 5709 5710 The i and j indices are 0 based 5711 5712 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5713 5714 This sets local rows and cannot be used to set off-processor values. 5715 5716 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5717 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5718 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5719 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5720 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5721 communication if it is known that only local entries will be set. 5722 5723 .keywords: matrix, aij, compressed row, sparse, parallel 5724 5725 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5726 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5727 @*/ 5728 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5729 { 5730 PetscErrorCode ierr; 5731 Mat_MPIAIJ *maij; 5732 5733 PetscFunctionBegin; 5734 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5735 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5736 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5737 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5738 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5739 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5740 maij = (Mat_MPIAIJ*) (*mat)->data; 5741 5742 (*mat)->preallocated = PETSC_TRUE; 5743 5744 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5745 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5746 5747 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5748 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5749 5750 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5751 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5752 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5753 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5754 5755 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5756 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5757 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5758 PetscFunctionReturn(0); 5759 } 5760 5761 /* 5762 Special version for direct calls from Fortran 5763 */ 5764 #include <petsc-private/fortranimpl.h> 5765 5766 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5767 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5768 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5769 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5770 #endif 5771 5772 /* Change these macros so can be used in void function */ 5773 #undef CHKERRQ 5774 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5775 #undef SETERRQ2 5776 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5777 #undef SETERRQ3 5778 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5779 #undef SETERRQ 5780 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5781 5782 #undef __FUNCT__ 5783 #define __FUNCT__ "matsetvaluesmpiaij_" 5784 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5785 { 5786 Mat mat = *mmat; 5787 PetscInt m = *mm, n = *mn; 5788 InsertMode addv = *maddv; 5789 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5790 PetscScalar value; 5791 PetscErrorCode ierr; 5792 5793 MatCheckPreallocated(mat,1); 5794 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5795 5796 #if defined(PETSC_USE_DEBUG) 5797 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5798 #endif 5799 { 5800 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5801 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5802 PetscBool roworiented = aij->roworiented; 5803 5804 /* Some Variables required in the macro */ 5805 Mat A = aij->A; 5806 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5807 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5808 MatScalar *aa = a->a; 5809 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5810 Mat B = aij->B; 5811 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5812 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5813 MatScalar *ba = b->a; 5814 5815 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5816 PetscInt nonew = a->nonew; 5817 MatScalar *ap1,*ap2; 5818 5819 PetscFunctionBegin; 5820 for (i=0; i<m; i++) { 5821 if (im[i] < 0) continue; 5822 #if defined(PETSC_USE_DEBUG) 5823 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5824 #endif 5825 if (im[i] >= rstart && im[i] < rend) { 5826 row = im[i] - rstart; 5827 lastcol1 = -1; 5828 rp1 = aj + ai[row]; 5829 ap1 = aa + ai[row]; 5830 rmax1 = aimax[row]; 5831 nrow1 = ailen[row]; 5832 low1 = 0; 5833 high1 = nrow1; 5834 lastcol2 = -1; 5835 rp2 = bj + bi[row]; 5836 ap2 = ba + bi[row]; 5837 rmax2 = bimax[row]; 5838 nrow2 = bilen[row]; 5839 low2 = 0; 5840 high2 = nrow2; 5841 5842 for (j=0; j<n; j++) { 5843 if (roworiented) value = v[i*n+j]; 5844 else value = v[i+j*m]; 5845 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5846 if (in[j] >= cstart && in[j] < cend) { 5847 col = in[j] - cstart; 5848 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5849 } else if (in[j] < 0) continue; 5850 #if defined(PETSC_USE_DEBUG) 5851 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5852 #endif 5853 else { 5854 if (mat->was_assembled) { 5855 if (!aij->colmap) { 5856 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5857 } 5858 #if defined(PETSC_USE_CTABLE) 5859 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5860 col--; 5861 #else 5862 col = aij->colmap[in[j]] - 1; 5863 #endif 5864 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5865 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5866 col = in[j]; 5867 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5868 B = aij->B; 5869 b = (Mat_SeqAIJ*)B->data; 5870 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5871 rp2 = bj + bi[row]; 5872 ap2 = ba + bi[row]; 5873 rmax2 = bimax[row]; 5874 nrow2 = bilen[row]; 5875 low2 = 0; 5876 high2 = nrow2; 5877 bm = aij->B->rmap->n; 5878 ba = b->a; 5879 } 5880 } else col = in[j]; 5881 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5882 } 5883 } 5884 } else if (!aij->donotstash) { 5885 if (roworiented) { 5886 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5887 } else { 5888 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5889 } 5890 } 5891 } 5892 } 5893 PetscFunctionReturnVoid(); 5894 } 5895 5896