1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 138 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 139 if (type == NORM_2) { 140 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 141 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 142 } 143 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 144 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 145 } 146 } else if (type == NORM_1) { 147 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 148 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 149 } 150 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 151 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 152 } 153 } else if (type == NORM_INFINITY) { 154 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 155 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 156 } 157 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 158 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 159 } 160 161 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 162 if (type == NORM_INFINITY) { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 164 } else { 165 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 166 } 167 ierr = PetscFree(work);CHKERRQ(ierr); 168 if (type == NORM_2) { 169 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 170 } 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNCT__ 175 #define __FUNCT__ "MatDistribute_MPIAIJ" 176 /* 177 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 178 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 179 180 Only for square matrices 181 182 Used by a preconditioner, hence PETSC_EXTERN 183 */ 184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 185 { 186 PetscMPIInt rank,size; 187 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 188 PetscErrorCode ierr; 189 Mat mat; 190 Mat_SeqAIJ *gmata; 191 PetscMPIInt tag; 192 MPI_Status status; 193 PetscBool aij; 194 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 195 196 PetscFunctionBegin; 197 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 199 if (!rank) { 200 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 201 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 202 } 203 if (reuse == MAT_INITIAL_MATRIX) { 204 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 205 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 206 if (!rank) { 207 bses[0] = gmat->rmap->bs; 208 bses[1] = gmat->cmap->bs; 209 } 210 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 211 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 212 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 213 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 214 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 215 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 216 217 rowners[0] = 0; 218 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 219 rstart = rowners[rank]; 220 rend = rowners[rank+1]; 221 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 222 if (!rank) { 223 gmata = (Mat_SeqAIJ*) gmat->data; 224 /* send row lengths to all processors */ 225 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 226 for (i=1; i<size; i++) { 227 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 228 } 229 /* determine number diagonal and off-diagonal counts */ 230 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 231 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 232 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 233 jj = 0; 234 for (i=0; i<m; i++) { 235 for (j=0; j<dlens[i]; j++) { 236 if (gmata->j[jj] < rstart) ld[i]++; 237 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 238 jj++; 239 } 240 } 241 /* send column indices to other processes */ 242 for (i=1; i<size; i++) { 243 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 244 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 245 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 246 } 247 248 /* send numerical values to other processes */ 249 for (i=1; i<size; i++) { 250 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 251 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 252 } 253 gmataa = gmata->a; 254 gmataj = gmata->j; 255 256 } else { 257 /* receive row lengths */ 258 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 /* receive column indices */ 260 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 262 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 263 /* determine number diagonal and off-diagonal counts */ 264 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 265 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 266 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 267 jj = 0; 268 for (i=0; i<m; i++) { 269 for (j=0; j<dlens[i]; j++) { 270 if (gmataj[jj] < rstart) ld[i]++; 271 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 272 jj++; 273 } 274 } 275 /* receive numerical values */ 276 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 277 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 278 } 279 /* set preallocation */ 280 for (i=0; i<m; i++) { 281 dlens[i] -= olens[i]; 282 } 283 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 284 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 285 286 for (i=0; i<m; i++) { 287 dlens[i] += olens[i]; 288 } 289 cnt = 0; 290 for (i=0; i<m; i++) { 291 row = rstart + i; 292 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 293 cnt += dlens[i]; 294 } 295 if (rank) { 296 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 297 } 298 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 299 ierr = PetscFree(rowners);CHKERRQ(ierr); 300 301 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 302 303 *inmat = mat; 304 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 307 mat = *inmat; 308 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 309 if (!rank) { 310 /* send numerical values to other processes */ 311 gmata = (Mat_SeqAIJ*) gmat->data; 312 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 313 gmataa = gmata->a; 314 for (i=1; i<size; i++) { 315 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 316 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 317 } 318 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 319 } else { 320 /* receive numerical values from process 0*/ 321 nz = Ad->nz + Ao->nz; 322 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 326 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 327 ad = Ad->a; 328 ao = Ao->a; 329 if (mat->rmap->n) { 330 i = 0; 331 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 for (i=1; i<mat->rmap->n; i++) { 335 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 336 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 337 } 338 i--; 339 if (mat->rmap->n) { 340 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 341 } 342 if (rank) { 343 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 344 } 345 } 346 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 /* 352 Local utility routine that creates a mapping from the global column 353 number to the local number in the off-diagonal part of the local 354 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 355 a slightly higher hash table cost; without it it is not scalable (each processor 356 has an order N integer array but is fast to acess. 357 */ 358 #undef __FUNCT__ 359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 361 { 362 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 363 PetscErrorCode ierr; 364 PetscInt n = aij->B->cmap->n,i; 365 366 PetscFunctionBegin; 367 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 368 #if defined(PETSC_USE_CTABLE) 369 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 370 for (i=0; i<n; i++) { 371 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 372 } 373 #else 374 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 375 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 376 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 377 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 378 #endif 379 PetscFunctionReturn(0); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 383 { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col;\ 387 while (high1-low1 > 5) { \ 388 t = (low1+high1)/2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i=low1; _i<high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) ap1[_i] += value; \ 396 else ap1[_i] = value; \ 397 goto a_noinsert; \ 398 } \ 399 } \ 400 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 401 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 402 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 403 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 404 N = nrow1++ - 1; a->nz++; high1++; \ 405 /* shift up all the later entries in this row */ \ 406 for (ii=N; ii>=_i; ii--) { \ 407 rp1[ii+1] = rp1[ii]; \ 408 ap1[ii+1] = ap1[ii]; \ 409 } \ 410 rp1[_i] = col; \ 411 ap1[_i] = value; \ 412 a_noinsert: ; \ 413 ailen[row] = nrow1; \ 414 } 415 416 417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 418 { \ 419 if (col <= lastcol2) low2 = 0; \ 420 else high2 = nrow2; \ 421 lastcol2 = col; \ 422 while (high2-low2 > 5) { \ 423 t = (low2+high2)/2; \ 424 if (rp2[t] > col) high2 = t; \ 425 else low2 = t; \ 426 } \ 427 for (_i=low2; _i<high2; _i++) { \ 428 if (rp2[_i] > col) break; \ 429 if (rp2[_i] == col) { \ 430 if (addv == ADD_VALUES) ap2[_i] += value; \ 431 else ap2[_i] = value; \ 432 goto b_noinsert; \ 433 } \ 434 } \ 435 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 436 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 437 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 438 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 439 N = nrow2++ - 1; b->nz++; high2++; \ 440 /* shift up all the later entries in this row */ \ 441 for (ii=N; ii>=_i; ii--) { \ 442 rp2[ii+1] = rp2[ii]; \ 443 ap2[ii+1] = ap2[ii]; \ 444 } \ 445 rp2[_i] = col; \ 446 ap2[_i] = value; \ 447 b_noinsert: ; \ 448 bilen[row] = nrow2; \ 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 454 { 455 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 456 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 457 PetscErrorCode ierr; 458 PetscInt l,*garray = mat->garray,diag; 459 460 PetscFunctionBegin; 461 /* code only works for square matrices A */ 462 463 /* find size of row to the left of the diagonal part */ 464 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 465 row = row - diag; 466 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 467 if (garray[b->j[b->i[row]+l]] > diag) break; 468 } 469 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 470 471 /* diagonal part */ 472 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 473 474 /* right of diagonal part */ 475 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatSetValues_MPIAIJ" 481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 PetscScalar value; 485 PetscErrorCode ierr; 486 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 487 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 488 PetscBool roworiented = aij->roworiented; 489 490 /* Some Variables required in the macro */ 491 Mat A = aij->A; 492 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 493 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 494 MatScalar *aa = a->a; 495 PetscBool ignorezeroentries = a->ignorezeroentries; 496 Mat B = aij->B; 497 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 498 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 499 MatScalar *ba = b->a; 500 501 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 502 PetscInt nonew; 503 MatScalar *ap1,*ap2; 504 505 PetscFunctionBegin; 506 if (v) PetscValidScalarPointer(v,6); 507 for (i=0; i<m; i++) { 508 if (im[i] < 0) continue; 509 #if defined(PETSC_USE_DEBUG) 510 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 511 #endif 512 if (im[i] >= rstart && im[i] < rend) { 513 row = im[i] - rstart; 514 lastcol1 = -1; 515 rp1 = aj + ai[row]; 516 ap1 = aa + ai[row]; 517 rmax1 = aimax[row]; 518 nrow1 = ailen[row]; 519 low1 = 0; 520 high1 = nrow1; 521 lastcol2 = -1; 522 rp2 = bj + bi[row]; 523 ap2 = ba + bi[row]; 524 rmax2 = bimax[row]; 525 nrow2 = bilen[row]; 526 low2 = 0; 527 high2 = nrow2; 528 529 for (j=0; j<n; j++) { 530 if (v) { 531 if (roworiented) value = v[i*n+j]; 532 else value = v[i+j*m]; 533 } else value = 0.0; 534 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 535 if (in[j] >= cstart && in[j] < cend) { 536 col = in[j] - cstart; 537 nonew = a->nonew; 538 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 539 } else if (in[j] < 0) continue; 540 #if defined(PETSC_USE_DEBUG) 541 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 542 #endif 543 else { 544 if (mat->was_assembled) { 545 if (!aij->colmap) { 546 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 547 } 548 #if defined(PETSC_USE_CTABLE) 549 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 550 col--; 551 #else 552 col = aij->colmap[in[j]] - 1; 553 #endif 554 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 555 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 556 col = in[j]; 557 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 558 B = aij->B; 559 b = (Mat_SeqAIJ*)B->data; 560 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 561 rp2 = bj + bi[row]; 562 ap2 = ba + bi[row]; 563 rmax2 = bimax[row]; 564 nrow2 = bilen[row]; 565 low2 = 0; 566 high2 = nrow2; 567 bm = aij->B->rmap->n; 568 ba = b->a; 569 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 570 } else col = in[j]; 571 nonew = b->nonew; 572 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 573 } 574 } 575 } else { 576 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 577 if (!aij->donotstash) { 578 mat->assembled = PETSC_FALSE; 579 if (roworiented) { 580 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 581 } else { 582 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 583 } 584 } 585 } 586 } 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatGetValues_MPIAIJ" 592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 593 { 594 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 595 PetscErrorCode ierr; 596 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 597 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 598 599 PetscFunctionBegin; 600 for (i=0; i<m; i++) { 601 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 602 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 603 if (idxm[i] >= rstart && idxm[i] < rend) { 604 row = idxm[i] - rstart; 605 for (j=0; j<n; j++) { 606 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 607 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 608 if (idxn[j] >= cstart && idxn[j] < cend) { 609 col = idxn[j] - cstart; 610 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 611 } else { 612 if (!aij->colmap) { 613 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 614 } 615 #if defined(PETSC_USE_CTABLE) 616 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 617 col--; 618 #else 619 col = aij->colmap[idxn[j]] - 1; 620 #endif 621 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 622 else { 623 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 624 } 625 } 626 } 627 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 628 } 629 PetscFunctionReturn(0); 630 } 631 632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt nstash,reallocs; 641 InsertMode addv; 642 643 PetscFunctionBegin; 644 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 645 646 /* make sure all processors are either in INSERTMODE or ADDMODE */ 647 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 648 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 649 mat->insertmode = addv; /* in case this processor had no cache */ 650 651 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 652 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 653 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 660 { 661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 662 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 663 PetscErrorCode ierr; 664 PetscMPIInt n; 665 PetscInt i,j,rstart,ncols,flg; 666 PetscInt *row,*col; 667 PetscBool other_disassembled; 668 PetscScalar *val; 669 InsertMode addv = mat->insertmode; 670 671 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 672 673 PetscFunctionBegin; 674 if (!aij->donotstash && !mat->nooffprocentries) { 675 while (1) { 676 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 677 if (!flg) break; 678 679 for (i=0; i<n; ) { 680 /* Now identify the consecutive vals belonging to the same row */ 681 for (j=i,rstart=row[j]; j<n; j++) { 682 if (row[j] != rstart) break; 683 } 684 if (j < n) ncols = j-i; 685 else ncols = n-i; 686 /* Now assemble all these values with a single function call */ 687 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 688 689 i = j; 690 } 691 } 692 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 693 } 694 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 695 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 696 697 /* determine if any processor has disassembled, if so we must 698 also disassemble ourselfs, in order that we may reassemble. */ 699 /* 700 if nonzero structure of submatrix B cannot change then we know that 701 no processor disassembled thus we can skip this stuff 702 */ 703 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 704 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 705 if (mat->was_assembled && !other_disassembled) { 706 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 707 } 708 } 709 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 710 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 711 } 712 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 713 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 714 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 715 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 716 717 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 718 719 aij->rowvalues = 0; 720 721 /* used by MatAXPY() */ 722 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 723 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 724 725 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 726 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 733 { 734 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 739 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatZeroRows_MPIAIJ" 745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 746 { 747 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 748 PetscErrorCode ierr; 749 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 750 PetscInt i,*owners = A->rmap->range; 751 PetscInt *nprocs,j,idx,nsends,row; 752 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 753 PetscInt *rvalues,count,base,slen,*source; 754 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 755 MPI_Comm comm; 756 MPI_Request *send_waits,*recv_waits; 757 MPI_Status recv_status,*send_status; 758 const PetscScalar *xx; 759 PetscScalar *bb; 760 #if defined(PETSC_DEBUG) 761 PetscBool found = PETSC_FALSE; 762 #endif 763 764 PetscFunctionBegin; 765 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 766 /* first count number of contributors to each processor */ 767 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 768 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 769 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 770 j = 0; 771 for (i=0; i<N; i++) { 772 if (lastidx > (idx = rows[i])) j = 0; 773 lastidx = idx; 774 for (; j<size; j++) { 775 if (idx >= owners[j] && idx < owners[j+1]) { 776 nprocs[2*j]++; 777 nprocs[2*j+1] = 1; 778 owner[i] = j; 779 #if defined(PETSC_DEBUG) 780 found = PETSC_TRUE; 781 #endif 782 break; 783 } 784 } 785 #if defined(PETSC_DEBUG) 786 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 787 found = PETSC_FALSE; 788 #endif 789 } 790 nsends = 0; 791 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 792 793 if (A->nooffproczerorows) { 794 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 795 nrecvs = nsends; 796 nmax = N; 797 } else { 798 /* inform other processors of number of messages and max length*/ 799 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 800 } 801 802 /* post receives: */ 803 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 804 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 805 for (i=0; i<nrecvs; i++) { 806 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 807 } 808 809 /* do sends: 810 1) starts[i] gives the starting index in svalues for stuff going to 811 the ith processor 812 */ 813 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 814 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 815 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 816 817 starts[0] = 0; 818 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 819 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 820 821 starts[0] = 0; 822 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 823 count = 0; 824 for (i=0; i<size; i++) { 825 if (nprocs[2*i+1]) { 826 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 827 } 828 } 829 ierr = PetscFree(starts);CHKERRQ(ierr); 830 831 base = owners[rank]; 832 833 /* wait on receives */ 834 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 835 count = nrecvs; slen = 0; 836 while (count) { 837 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 838 /* unpack receives into our local space */ 839 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 840 841 source[imdex] = recv_status.MPI_SOURCE; 842 lens[imdex] = n; 843 slen += n; 844 count--; 845 } 846 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 847 848 /* move the data into the send scatter */ 849 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 850 count = 0; 851 for (i=0; i<nrecvs; i++) { 852 values = rvalues + i*nmax; 853 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 854 } 855 ierr = PetscFree(rvalues);CHKERRQ(ierr); 856 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 857 ierr = PetscFree(owner);CHKERRQ(ierr); 858 ierr = PetscFree(nprocs);CHKERRQ(ierr); 859 860 /* fix right hand side if needed */ 861 if (x && b) { 862 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 863 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 864 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 865 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 866 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 867 } 868 /* 869 Zero the required rows. If the "diagonal block" of the matrix 870 is square and the user wishes to set the diagonal we use separate 871 code so that MatSetValues() is not called for each diagonal allocating 872 new memory, thus calling lots of mallocs and slowing things down. 873 874 */ 875 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 876 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 877 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 878 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 879 } else if (diag != 0.0) { 880 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 881 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 882 for (i = 0; i < slen; i++) { 883 row = lrows[i] + rstart; 884 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 885 } 886 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 } else { 889 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 890 } 891 ierr = PetscFree(lrows);CHKERRQ(ierr); 892 893 /* wait on sends */ 894 if (nsends) { 895 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 896 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 897 ierr = PetscFree(send_status);CHKERRQ(ierr); 898 } 899 ierr = PetscFree(send_waits);CHKERRQ(ierr); 900 ierr = PetscFree(svalues);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 907 { 908 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 909 PetscErrorCode ierr; 910 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 911 PetscInt i,*owners = A->rmap->range; 912 PetscInt *nprocs,j,idx,nsends; 913 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 914 PetscInt *rvalues,count,base,slen,*source; 915 PetscInt *lens,*lrows,*values,m; 916 MPI_Comm comm; 917 MPI_Request *send_waits,*recv_waits; 918 MPI_Status recv_status,*send_status; 919 const PetscScalar *xx; 920 PetscScalar *bb,*mask; 921 Vec xmask,lmask; 922 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 923 const PetscInt *aj, *ii,*ridx; 924 PetscScalar *aa; 925 #if defined(PETSC_DEBUG) 926 PetscBool found = PETSC_FALSE; 927 #endif 928 929 PetscFunctionBegin; 930 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 931 /* first count number of contributors to each processor */ 932 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 933 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 934 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 935 j = 0; 936 for (i=0; i<N; i++) { 937 if (lastidx > (idx = rows[i])) j = 0; 938 lastidx = idx; 939 for (; j<size; j++) { 940 if (idx >= owners[j] && idx < owners[j+1]) { 941 nprocs[2*j]++; 942 nprocs[2*j+1] = 1; 943 owner[i] = j; 944 #if defined(PETSC_DEBUG) 945 found = PETSC_TRUE; 946 #endif 947 break; 948 } 949 } 950 #if defined(PETSC_DEBUG) 951 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 952 found = PETSC_FALSE; 953 #endif 954 } 955 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 956 957 /* inform other processors of number of messages and max length*/ 958 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 959 960 /* post receives: */ 961 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 962 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 963 for (i=0; i<nrecvs; i++) { 964 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 965 } 966 967 /* do sends: 968 1) starts[i] gives the starting index in svalues for stuff going to 969 the ith processor 970 */ 971 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 972 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 973 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 974 975 starts[0] = 0; 976 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 977 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 978 979 starts[0] = 0; 980 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 981 count = 0; 982 for (i=0; i<size; i++) { 983 if (nprocs[2*i+1]) { 984 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 985 } 986 } 987 ierr = PetscFree(starts);CHKERRQ(ierr); 988 989 base = owners[rank]; 990 991 /* wait on receives */ 992 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 993 count = nrecvs; slen = 0; 994 while (count) { 995 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 996 /* unpack receives into our local space */ 997 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 998 999 source[imdex] = recv_status.MPI_SOURCE; 1000 lens[imdex] = n; 1001 slen += n; 1002 count--; 1003 } 1004 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1005 1006 /* move the data into the send scatter */ 1007 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1008 count = 0; 1009 for (i=0; i<nrecvs; i++) { 1010 values = rvalues + i*nmax; 1011 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1012 } 1013 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1014 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1015 ierr = PetscFree(owner);CHKERRQ(ierr); 1016 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1017 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1018 1019 /* zero diagonal part of matrix */ 1020 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1021 1022 /* handle off diagonal part of matrix */ 1023 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1024 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1025 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1026 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1027 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1028 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1031 if (x) { 1032 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1035 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1036 } 1037 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1038 1039 /* remove zeroed rows of off diagonal matrix */ 1040 ii = aij->i; 1041 for (i=0; i<slen; i++) { 1042 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1043 } 1044 1045 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1046 if (aij->compressedrow.use) { 1047 m = aij->compressedrow.nrows; 1048 ii = aij->compressedrow.i; 1049 ridx = aij->compressedrow.rindex; 1050 for (i=0; i<m; i++) { 1051 n = ii[i+1] - ii[i]; 1052 aj = aij->j + ii[i]; 1053 aa = aij->a + ii[i]; 1054 1055 for (j=0; j<n; j++) { 1056 if (PetscAbsScalar(mask[*aj])) { 1057 if (b) bb[*ridx] -= *aa*xx[*aj]; 1058 *aa = 0.0; 1059 } 1060 aa++; 1061 aj++; 1062 } 1063 ridx++; 1064 } 1065 } else { /* do not use compressed row format */ 1066 m = l->B->rmap->n; 1067 for (i=0; i<m; i++) { 1068 n = ii[i+1] - ii[i]; 1069 aj = aij->j + ii[i]; 1070 aa = aij->a + ii[i]; 1071 for (j=0; j<n; j++) { 1072 if (PetscAbsScalar(mask[*aj])) { 1073 if (b) bb[i] -= *aa*xx[*aj]; 1074 *aa = 0.0; 1075 } 1076 aa++; 1077 aj++; 1078 } 1079 } 1080 } 1081 if (x) { 1082 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1083 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1084 } 1085 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1086 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1087 ierr = PetscFree(lrows);CHKERRQ(ierr); 1088 1089 /* wait on sends */ 1090 if (nsends) { 1091 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1092 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1093 ierr = PetscFree(send_status);CHKERRQ(ierr); 1094 } 1095 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1096 ierr = PetscFree(svalues);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMult_MPIAIJ" 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 1108 PetscFunctionBegin; 1109 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1110 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscBool merged; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 if (!merged) { 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* added in yy until the next line, */ 1164 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 } else { 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 /* values actually were received in the Begin() but we need to call this nop */ 1171 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1178 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1179 { 1180 MPI_Comm comm; 1181 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1182 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1183 IS Me,Notme; 1184 PetscErrorCode ierr; 1185 PetscInt M,N,first,last,*notme,i; 1186 PetscMPIInt size; 1187 1188 PetscFunctionBegin; 1189 /* Easy test: symmetric diagonal block */ 1190 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1191 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1192 if (!*f) PetscFunctionReturn(0); 1193 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1194 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1195 if (size == 1) PetscFunctionReturn(0); 1196 1197 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1198 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1199 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1200 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1201 for (i=0; i<first; i++) notme[i] = i; 1202 for (i=last; i<M; i++) notme[i-last+first] = i; 1203 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1204 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1205 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1206 Aoff = Aoffs[0]; 1207 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1208 Boff = Boffs[0]; 1209 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1210 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1211 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1212 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1213 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1214 ierr = PetscFree(notme);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1221 { 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1223 PetscErrorCode ierr; 1224 1225 PetscFunctionBegin; 1226 /* do nondiagonal part */ 1227 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1228 /* send it on its way */ 1229 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1230 /* do local part */ 1231 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1232 /* receive remote parts */ 1233 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 /* 1238 This only works correctly for square matrices where the subblock A->A is the 1239 diagonal block 1240 */ 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1244 { 1245 PetscErrorCode ierr; 1246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1247 1248 PetscFunctionBegin; 1249 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1250 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1251 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatScale_MPIAIJ" 1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1258 { 1259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1260 PetscErrorCode ierr; 1261 1262 PetscFunctionBegin; 1263 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1264 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatDestroy_MPIAIJ" 1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1271 { 1272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1273 PetscErrorCode ierr; 1274 1275 PetscFunctionBegin; 1276 #if defined(PETSC_USE_LOG) 1277 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1278 #endif 1279 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1280 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1281 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1282 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1283 #if defined(PETSC_USE_CTABLE) 1284 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1285 #else 1286 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1287 #endif 1288 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1289 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1290 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1291 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1292 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1293 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1294 1295 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1310 { 1311 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1312 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1313 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1314 PetscErrorCode ierr; 1315 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1316 int fd; 1317 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1318 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1319 PetscScalar *column_values; 1320 PetscInt message_count,flowcontrolcount; 1321 FILE *file; 1322 1323 PetscFunctionBegin; 1324 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1325 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1326 nz = A->nz + B->nz; 1327 if (!rank) { 1328 header[0] = MAT_FILE_CLASSID; 1329 header[1] = mat->rmap->N; 1330 header[2] = mat->cmap->N; 1331 1332 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1333 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1334 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 /* get largest number of rows any processor has */ 1336 rlen = mat->rmap->n; 1337 range = mat->rmap->range; 1338 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1339 } else { 1340 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1341 rlen = mat->rmap->n; 1342 } 1343 1344 /* load up the local row counts */ 1345 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1346 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1347 1348 /* store the row lengths to the file */ 1349 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1350 if (!rank) { 1351 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1352 for (i=1; i<size; i++) { 1353 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1354 rlen = range[i+1] - range[i]; 1355 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1356 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1357 } 1358 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1359 } else { 1360 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1361 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1362 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1363 } 1364 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1365 1366 /* load up the local column indices */ 1367 nzmax = nz; /* th processor needs space a largest processor needs */ 1368 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1369 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1370 cnt = 0; 1371 for (i=0; i<mat->rmap->n; i++) { 1372 for (j=B->i[i]; j<B->i[i+1]; j++) { 1373 if ((col = garray[B->j[j]]) > cstart) break; 1374 column_indices[cnt++] = col; 1375 } 1376 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1377 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1378 } 1379 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1380 1381 /* store the column indices to the file */ 1382 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1383 if (!rank) { 1384 MPI_Status status; 1385 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1386 for (i=1; i<size; i++) { 1387 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1388 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1389 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1390 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1391 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1392 } 1393 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1394 } else { 1395 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1396 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1397 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1398 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1399 } 1400 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1401 1402 /* load up the local column values */ 1403 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1404 cnt = 0; 1405 for (i=0; i<mat->rmap->n; i++) { 1406 for (j=B->i[i]; j<B->i[i+1]; j++) { 1407 if (garray[B->j[j]] > cstart) break; 1408 column_values[cnt++] = B->a[j]; 1409 } 1410 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1411 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1412 } 1413 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1414 1415 /* store the column values to the file */ 1416 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1417 if (!rank) { 1418 MPI_Status status; 1419 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1420 for (i=1; i<size; i++) { 1421 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1422 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1423 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1424 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1425 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1426 } 1427 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1428 } else { 1429 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1430 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1431 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1432 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1433 } 1434 ierr = PetscFree(column_values);CHKERRQ(ierr); 1435 1436 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1437 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #include <petscdraw.h> 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1445 { 1446 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1447 PetscErrorCode ierr; 1448 PetscMPIInt rank = aij->rank,size = aij->size; 1449 PetscBool isdraw,iascii,isbinary; 1450 PetscViewer sviewer; 1451 PetscViewerFormat format; 1452 1453 PetscFunctionBegin; 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1456 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1457 if (iascii) { 1458 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1459 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1460 MatInfo info; 1461 PetscBool inodes; 1462 1463 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1464 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1465 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1467 if (!inodes) { 1468 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1469 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1470 } else { 1471 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1472 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1473 } 1474 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1475 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1476 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1477 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1478 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1479 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1480 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1481 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1484 PetscInt inodecount,inodelimit,*inodes; 1485 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1486 if (inodes) { 1487 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1488 } else { 1489 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1490 } 1491 PetscFunctionReturn(0); 1492 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1493 PetscFunctionReturn(0); 1494 } 1495 } else if (isbinary) { 1496 if (size == 1) { 1497 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1498 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1499 } else { 1500 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1501 } 1502 PetscFunctionReturn(0); 1503 } else if (isdraw) { 1504 PetscDraw draw; 1505 PetscBool isnull; 1506 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1507 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1508 } 1509 1510 if (size == 1) { 1511 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1512 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1513 } else { 1514 /* assemble the entire matrix onto first processor. */ 1515 Mat A; 1516 Mat_SeqAIJ *Aloc; 1517 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1518 MatScalar *a; 1519 1520 if (mat->rmap->N > 1024) { 1521 PetscBool flg = PETSC_FALSE; 1522 1523 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1524 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1525 } 1526 1527 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1528 if (!rank) { 1529 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1530 } else { 1531 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1532 } 1533 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1534 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1535 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1536 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1537 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1538 1539 /* copy over the A part */ 1540 Aloc = (Mat_SeqAIJ*)aij->A->data; 1541 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1542 row = mat->rmap->rstart; 1543 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1544 for (i=0; i<m; i++) { 1545 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1546 row++; 1547 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1548 } 1549 aj = Aloc->j; 1550 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1551 1552 /* copy over the B part */ 1553 Aloc = (Mat_SeqAIJ*)aij->B->data; 1554 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1555 row = mat->rmap->rstart; 1556 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1557 ct = cols; 1558 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1559 for (i=0; i<m; i++) { 1560 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1561 row++; 1562 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1563 } 1564 ierr = PetscFree(ct);CHKERRQ(ierr); 1565 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1566 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1567 /* 1568 Everyone has to call to draw the matrix since the graphics waits are 1569 synchronized across all processors that share the PetscDraw object 1570 */ 1571 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1572 if (!rank) { 1573 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1574 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1575 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1576 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1577 } 1578 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1579 ierr = MatDestroy(&A);CHKERRQ(ierr); 1580 } 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatView_MPIAIJ" 1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1587 { 1588 PetscErrorCode ierr; 1589 PetscBool iascii,isdraw,issocket,isbinary; 1590 1591 PetscFunctionBegin; 1592 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1595 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1596 if (iascii || isdraw || isbinary || issocket) { 1597 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "MatSOR_MPIAIJ" 1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1605 { 1606 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1607 PetscErrorCode ierr; 1608 Vec bb1 = 0; 1609 PetscBool hasop; 1610 1611 PetscFunctionBegin; 1612 if (flag == SOR_APPLY_UPPER) { 1613 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1618 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1619 } 1620 1621 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1622 if (flag & SOR_ZERO_INITIAL_GUESS) { 1623 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1624 its--; 1625 } 1626 1627 while (its--) { 1628 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 1631 /* update rhs: bb1 = bb - B*x */ 1632 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1633 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1634 1635 /* local sweep */ 1636 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1637 } 1638 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1639 if (flag & SOR_ZERO_INITIAL_GUESS) { 1640 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1641 its--; 1642 } 1643 while (its--) { 1644 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 1647 /* update rhs: bb1 = bb - B*x */ 1648 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1649 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1650 1651 /* local sweep */ 1652 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1653 } 1654 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1655 if (flag & SOR_ZERO_INITIAL_GUESS) { 1656 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1657 its--; 1658 } 1659 while (its--) { 1660 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 1663 /* update rhs: bb1 = bb - B*x */ 1664 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1665 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1666 1667 /* local sweep */ 1668 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1669 } 1670 } else if (flag & SOR_EISENSTAT) { 1671 Vec xx1; 1672 1673 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1675 1676 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1677 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 if (!mat->diag) { 1679 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1680 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1681 } 1682 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1683 if (hasop) { 1684 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1685 } else { 1686 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1687 } 1688 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1689 1690 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1691 1692 /* local sweep */ 1693 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1694 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1695 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1696 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1697 1698 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatPermute_MPIAIJ" 1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1705 { 1706 Mat aA,aB,Aperm; 1707 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1708 PetscScalar *aa,*ba; 1709 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1710 PetscSF rowsf,sf; 1711 IS parcolp = NULL; 1712 PetscBool done; 1713 PetscErrorCode ierr; 1714 1715 PetscFunctionBegin; 1716 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1717 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1718 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1719 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1720 1721 /* Invert row permutation to find out where my rows should go */ 1722 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1723 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1724 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1725 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1726 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1727 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1728 1729 /* Invert column permutation to find out where my columns should go */ 1730 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1731 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1732 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1733 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1734 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1735 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1736 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1737 1738 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1739 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1740 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1741 1742 /* Find out where my gcols should go */ 1743 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1744 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1745 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1746 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1747 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1748 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1749 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1750 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1751 1752 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1753 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1755 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1756 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1757 for (i=0; i<m; i++) { 1758 PetscInt row = rdest[i],rowner; 1759 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1760 for (j=ai[i]; j<ai[i+1]; j++) { 1761 PetscInt cowner,col = cdest[aj[j]]; 1762 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1763 if (rowner == cowner) dnnz[i]++; 1764 else onnz[i]++; 1765 } 1766 for (j=bi[i]; j<bi[i+1]; j++) { 1767 PetscInt cowner,col = gcdest[bj[j]]; 1768 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1769 if (rowner == cowner) dnnz[i]++; 1770 else onnz[i]++; 1771 } 1772 } 1773 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1774 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1775 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1776 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1777 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1778 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1779 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1780 1781 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1782 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1783 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1784 for (i=0; i<m; i++) { 1785 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1786 PetscInt rowlen; 1787 rowlen = ai[i+1] - ai[i]; 1788 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1789 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1790 rowlen = bi[i+1] - bi[i]; 1791 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1792 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1793 } 1794 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1795 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1797 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1798 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1799 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1800 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1801 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1802 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1803 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1804 *B = Aperm; 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1811 { 1812 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1813 Mat A = mat->A,B = mat->B; 1814 PetscErrorCode ierr; 1815 PetscReal isend[5],irecv[5]; 1816 1817 PetscFunctionBegin; 1818 info->block_size = 1.0; 1819 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1820 1821 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1822 isend[3] = info->memory; isend[4] = info->mallocs; 1823 1824 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1825 1826 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1827 isend[3] += info->memory; isend[4] += info->mallocs; 1828 if (flag == MAT_LOCAL) { 1829 info->nz_used = isend[0]; 1830 info->nz_allocated = isend[1]; 1831 info->nz_unneeded = isend[2]; 1832 info->memory = isend[3]; 1833 info->mallocs = isend[4]; 1834 } else if (flag == MAT_GLOBAL_MAX) { 1835 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1836 1837 info->nz_used = irecv[0]; 1838 info->nz_allocated = irecv[1]; 1839 info->nz_unneeded = irecv[2]; 1840 info->memory = irecv[3]; 1841 info->mallocs = irecv[4]; 1842 } else if (flag == MAT_GLOBAL_SUM) { 1843 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1844 1845 info->nz_used = irecv[0]; 1846 info->nz_allocated = irecv[1]; 1847 info->nz_unneeded = irecv[2]; 1848 info->memory = irecv[3]; 1849 info->mallocs = irecv[4]; 1850 } 1851 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1852 info->fill_ratio_needed = 0; 1853 info->factor_mallocs = 0; 1854 PetscFunctionReturn(0); 1855 } 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatSetOption_MPIAIJ" 1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1860 { 1861 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1862 PetscErrorCode ierr; 1863 1864 PetscFunctionBegin; 1865 switch (op) { 1866 case MAT_NEW_NONZERO_LOCATIONS: 1867 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1868 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1869 case MAT_KEEP_NONZERO_PATTERN: 1870 case MAT_NEW_NONZERO_LOCATION_ERR: 1871 case MAT_USE_INODES: 1872 case MAT_IGNORE_ZERO_ENTRIES: 1873 MatCheckPreallocated(A,1); 1874 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1875 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1876 break; 1877 case MAT_ROW_ORIENTED: 1878 a->roworiented = flg; 1879 1880 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1881 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1882 break; 1883 case MAT_NEW_DIAGONALS: 1884 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1885 break; 1886 case MAT_IGNORE_OFF_PROC_ENTRIES: 1887 a->donotstash = flg; 1888 break; 1889 case MAT_SPD: 1890 A->spd_set = PETSC_TRUE; 1891 A->spd = flg; 1892 if (flg) { 1893 A->symmetric = PETSC_TRUE; 1894 A->structurally_symmetric = PETSC_TRUE; 1895 A->symmetric_set = PETSC_TRUE; 1896 A->structurally_symmetric_set = PETSC_TRUE; 1897 } 1898 break; 1899 case MAT_SYMMETRIC: 1900 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1901 break; 1902 case MAT_STRUCTURALLY_SYMMETRIC: 1903 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1904 break; 1905 case MAT_HERMITIAN: 1906 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1907 break; 1908 case MAT_SYMMETRY_ETERNAL: 1909 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1910 break; 1911 default: 1912 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1913 } 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatGetRow_MPIAIJ" 1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1920 { 1921 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1922 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1923 PetscErrorCode ierr; 1924 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1925 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1926 PetscInt *cmap,*idx_p; 1927 1928 PetscFunctionBegin; 1929 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1930 mat->getrowactive = PETSC_TRUE; 1931 1932 if (!mat->rowvalues && (idx || v)) { 1933 /* 1934 allocate enough space to hold information from the longest row. 1935 */ 1936 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1937 PetscInt max = 1,tmp; 1938 for (i=0; i<matin->rmap->n; i++) { 1939 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1940 if (max < tmp) max = tmp; 1941 } 1942 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1943 } 1944 1945 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1946 lrow = row - rstart; 1947 1948 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1949 if (!v) {pvA = 0; pvB = 0;} 1950 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1951 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1952 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1953 nztot = nzA + nzB; 1954 1955 cmap = mat->garray; 1956 if (v || idx) { 1957 if (nztot) { 1958 /* Sort by increasing column numbers, assuming A and B already sorted */ 1959 PetscInt imark = -1; 1960 if (v) { 1961 *v = v_p = mat->rowvalues; 1962 for (i=0; i<nzB; i++) { 1963 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1964 else break; 1965 } 1966 imark = i; 1967 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1968 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1969 } 1970 if (idx) { 1971 *idx = idx_p = mat->rowindices; 1972 if (imark > -1) { 1973 for (i=0; i<imark; i++) { 1974 idx_p[i] = cmap[cworkB[i]]; 1975 } 1976 } else { 1977 for (i=0; i<nzB; i++) { 1978 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1979 else break; 1980 } 1981 imark = i; 1982 } 1983 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1984 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1985 } 1986 } else { 1987 if (idx) *idx = 0; 1988 if (v) *v = 0; 1989 } 1990 } 1991 *nz = nztot; 1992 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1993 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2000 { 2001 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2002 2003 PetscFunctionBegin; 2004 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2005 aij->getrowactive = PETSC_FALSE; 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatNorm_MPIAIJ" 2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2012 { 2013 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2014 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2015 PetscErrorCode ierr; 2016 PetscInt i,j,cstart = mat->cmap->rstart; 2017 PetscReal sum = 0.0; 2018 MatScalar *v; 2019 2020 PetscFunctionBegin; 2021 if (aij->size == 1) { 2022 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2023 } else { 2024 if (type == NORM_FROBENIUS) { 2025 v = amat->a; 2026 for (i=0; i<amat->nz; i++) { 2027 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2028 } 2029 v = bmat->a; 2030 for (i=0; i<bmat->nz; i++) { 2031 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2032 } 2033 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2034 *norm = PetscSqrtReal(*norm); 2035 } else if (type == NORM_1) { /* max column norm */ 2036 PetscReal *tmp,*tmp2; 2037 PetscInt *jj,*garray = aij->garray; 2038 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2039 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2040 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2041 *norm = 0.0; 2042 v = amat->a; jj = amat->j; 2043 for (j=0; j<amat->nz; j++) { 2044 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2045 } 2046 v = bmat->a; jj = bmat->j; 2047 for (j=0; j<bmat->nz; j++) { 2048 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2049 } 2050 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2051 for (j=0; j<mat->cmap->N; j++) { 2052 if (tmp2[j] > *norm) *norm = tmp2[j]; 2053 } 2054 ierr = PetscFree(tmp);CHKERRQ(ierr); 2055 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2056 } else if (type == NORM_INFINITY) { /* max row norm */ 2057 PetscReal ntemp = 0.0; 2058 for (j=0; j<aij->A->rmap->n; j++) { 2059 v = amat->a + amat->i[j]; 2060 sum = 0.0; 2061 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2062 sum += PetscAbsScalar(*v); v++; 2063 } 2064 v = bmat->a + bmat->i[j]; 2065 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2066 sum += PetscAbsScalar(*v); v++; 2067 } 2068 if (sum > ntemp) ntemp = sum; 2069 } 2070 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2071 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2072 } 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatTranspose_MPIAIJ" 2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2079 { 2080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2081 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2082 PetscErrorCode ierr; 2083 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2084 PetscInt cstart = A->cmap->rstart,ncol; 2085 Mat B; 2086 MatScalar *array; 2087 2088 PetscFunctionBegin; 2089 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2090 2091 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2092 ai = Aloc->i; aj = Aloc->j; 2093 bi = Bloc->i; bj = Bloc->j; 2094 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2095 PetscInt *d_nnz,*g_nnz,*o_nnz; 2096 PetscSFNode *oloc; 2097 PETSC_UNUSED PetscSF sf; 2098 2099 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2100 /* compute d_nnz for preallocation */ 2101 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2102 for (i=0; i<ai[ma]; i++) { 2103 d_nnz[aj[i]]++; 2104 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2105 } 2106 /* compute local off-diagonal contributions */ 2107 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2108 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2109 /* map those to global */ 2110 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2111 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2112 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2113 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2114 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2115 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2116 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2117 2118 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2119 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2120 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2121 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2122 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2123 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2124 } else { 2125 B = *matout; 2126 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2127 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2128 } 2129 2130 /* copy over the A part */ 2131 array = Aloc->a; 2132 row = A->rmap->rstart; 2133 for (i=0; i<ma; i++) { 2134 ncol = ai[i+1]-ai[i]; 2135 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2136 row++; 2137 array += ncol; aj += ncol; 2138 } 2139 aj = Aloc->j; 2140 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2141 2142 /* copy over the B part */ 2143 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2144 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2145 array = Bloc->a; 2146 row = A->rmap->rstart; 2147 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2148 cols_tmp = cols; 2149 for (i=0; i<mb; i++) { 2150 ncol = bi[i+1]-bi[i]; 2151 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2152 row++; 2153 array += ncol; cols_tmp += ncol; 2154 } 2155 ierr = PetscFree(cols);CHKERRQ(ierr); 2156 2157 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2160 *matout = B; 2161 } else { 2162 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2163 } 2164 PetscFunctionReturn(0); 2165 } 2166 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2170 { 2171 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2172 Mat a = aij->A,b = aij->B; 2173 PetscErrorCode ierr; 2174 PetscInt s1,s2,s3; 2175 2176 PetscFunctionBegin; 2177 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2178 if (rr) { 2179 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2180 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2181 /* Overlap communication with computation. */ 2182 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2183 } 2184 if (ll) { 2185 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2186 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2187 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2188 } 2189 /* scale the diagonal block */ 2190 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2191 2192 if (rr) { 2193 /* Do a scatter end and then right scale the off-diagonal block */ 2194 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2195 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2196 } 2197 PetscFunctionReturn(0); 2198 } 2199 2200 #undef __FUNCT__ 2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2203 { 2204 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2205 PetscErrorCode ierr; 2206 2207 PetscFunctionBegin; 2208 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2209 PetscFunctionReturn(0); 2210 } 2211 2212 #undef __FUNCT__ 2213 #define __FUNCT__ "MatEqual_MPIAIJ" 2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2215 { 2216 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2217 Mat a,b,c,d; 2218 PetscBool flg; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 a = matA->A; b = matA->B; 2223 c = matB->A; d = matB->B; 2224 2225 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2226 if (flg) { 2227 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2228 } 2229 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } 2232 2233 #undef __FUNCT__ 2234 #define __FUNCT__ "MatCopy_MPIAIJ" 2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2236 { 2237 PetscErrorCode ierr; 2238 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2239 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2240 2241 PetscFunctionBegin; 2242 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2243 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2244 /* because of the column compression in the off-processor part of the matrix a->B, 2245 the number of columns in a->B and b->B may be different, hence we cannot call 2246 the MatCopy() directly on the two parts. If need be, we can provide a more 2247 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2248 then copying the submatrices */ 2249 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2250 } else { 2251 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2252 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2253 } 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "MatSetUp_MPIAIJ" 2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2260 { 2261 PetscErrorCode ierr; 2262 2263 PetscFunctionBegin; 2264 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2265 PetscFunctionReturn(0); 2266 } 2267 2268 #undef __FUNCT__ 2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2272 { 2273 PetscInt i,m=Y->rmap->N; 2274 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2275 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2276 const PetscInt *xi = x->i,*yi = y->i; 2277 2278 PetscFunctionBegin; 2279 /* Set the number of nonzeros in the new matrix */ 2280 for (i=0; i<m; i++) { 2281 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2282 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2283 nnz[i] = 0; 2284 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2285 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2286 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2287 nnz[i]++; 2288 } 2289 for (; k<nzy; k++) nnz[i]++; 2290 } 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatAXPY_MPIAIJ" 2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2297 { 2298 PetscErrorCode ierr; 2299 PetscInt i; 2300 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2301 PetscBLASInt bnz,one=1; 2302 Mat_SeqAIJ *x,*y; 2303 2304 PetscFunctionBegin; 2305 if (str == SAME_NONZERO_PATTERN) { 2306 PetscScalar alpha = a; 2307 x = (Mat_SeqAIJ*)xx->A->data; 2308 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2309 y = (Mat_SeqAIJ*)yy->A->data; 2310 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2311 x = (Mat_SeqAIJ*)xx->B->data; 2312 y = (Mat_SeqAIJ*)yy->B->data; 2313 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2314 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2315 } else if (str == SUBSET_NONZERO_PATTERN) { 2316 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2317 2318 x = (Mat_SeqAIJ*)xx->B->data; 2319 y = (Mat_SeqAIJ*)yy->B->data; 2320 if (y->xtoy && y->XtoY != xx->B) { 2321 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2322 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2323 } 2324 if (!y->xtoy) { /* get xtoy */ 2325 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2326 y->XtoY = xx->B; 2327 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2328 } 2329 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2330 } else { 2331 Mat B; 2332 PetscInt *nnz_d,*nnz_o; 2333 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2334 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2335 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2336 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2337 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2338 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2339 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2340 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2341 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2342 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2343 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2344 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2345 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2346 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2347 } 2348 PetscFunctionReturn(0); 2349 } 2350 2351 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2352 2353 #undef __FUNCT__ 2354 #define __FUNCT__ "MatConjugate_MPIAIJ" 2355 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2356 { 2357 #if defined(PETSC_USE_COMPLEX) 2358 PetscErrorCode ierr; 2359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2360 2361 PetscFunctionBegin; 2362 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2363 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2364 #else 2365 PetscFunctionBegin; 2366 #endif 2367 PetscFunctionReturn(0); 2368 } 2369 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "MatRealPart_MPIAIJ" 2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2373 { 2374 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2375 PetscErrorCode ierr; 2376 2377 PetscFunctionBegin; 2378 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2379 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2386 { 2387 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2388 PetscErrorCode ierr; 2389 2390 PetscFunctionBegin; 2391 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2392 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #if defined(PETSC_HAVE_PBGL) 2397 2398 #include <boost/parallel/mpi/bsp_process_group.hpp> 2399 #include <boost/graph/distributed/ilu_default_graph.hpp> 2400 #include <boost/graph/distributed/ilu_0_block.hpp> 2401 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2402 #include <boost/graph/distributed/petsc/interface.hpp> 2403 #include <boost/multi_array.hpp> 2404 #include <boost/parallel/distributed_property_map->hpp> 2405 2406 #undef __FUNCT__ 2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2408 /* 2409 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2410 */ 2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2412 { 2413 namespace petsc = boost::distributed::petsc; 2414 2415 namespace graph_dist = boost::graph::distributed; 2416 using boost::graph::distributed::ilu_default::process_group_type; 2417 using boost::graph::ilu_permuted; 2418 2419 PetscBool row_identity, col_identity; 2420 PetscContainer c; 2421 PetscInt m, n, M, N; 2422 PetscErrorCode ierr; 2423 2424 PetscFunctionBegin; 2425 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2426 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2427 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2428 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2429 2430 process_group_type pg; 2431 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2432 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2433 lgraph_type& level_graph = *lgraph_p; 2434 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2435 2436 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2437 ilu_permuted(level_graph); 2438 2439 /* put together the new matrix */ 2440 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2441 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2442 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2443 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2444 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2445 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2446 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2447 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2448 2449 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2450 ierr = PetscContainerSetPointer(c, lgraph_p); 2451 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2452 ierr = PetscContainerDestroy(&c); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 #undef __FUNCT__ 2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2459 { 2460 PetscFunctionBegin; 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatSolve_MPIAIJ" 2466 /* 2467 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2468 */ 2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2470 { 2471 namespace graph_dist = boost::graph::distributed; 2472 2473 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2474 lgraph_type *lgraph_p; 2475 PetscContainer c; 2476 PetscErrorCode ierr; 2477 2478 PetscFunctionBegin; 2479 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2480 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2481 ierr = VecCopy(b, x);CHKERRQ(ierr); 2482 2483 PetscScalar *array_x; 2484 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2485 PetscInt sx; 2486 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2487 2488 PetscScalar *array_b; 2489 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2490 PetscInt sb; 2491 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2492 2493 lgraph_type& level_graph = *lgraph_p; 2494 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2495 2496 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2497 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2498 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2499 2500 typedef boost::iterator_property_map<array_ref_type::iterator, 2501 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2502 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2503 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2504 2505 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2506 PetscFunctionReturn(0); 2507 } 2508 #endif 2509 2510 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2511 PetscInt nzlocal,nsends,nrecvs; 2512 PetscMPIInt *send_rank,*recv_rank; 2513 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2514 PetscScalar *sbuf_a,**rbuf_a; 2515 PetscSubcomm *psubcomm; 2516 PetscErrorCode (*Destroy)(Mat); 2517 } Mat_Redundant; 2518 2519 #undef __FUNCT__ 2520 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2521 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2522 { 2523 PetscErrorCode ierr; 2524 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2525 PetscInt i; 2526 2527 PetscFunctionBegin; 2528 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2529 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2530 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2531 for (i=0; i<redund->nrecvs; i++) { 2532 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2533 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2534 } 2535 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2536 ierr = PetscFree(redund);CHKERRQ(ierr); 2537 PetscFunctionReturn(0); 2538 } 2539 2540 #undef __FUNCT__ 2541 #define __FUNCT__ "MatDestroy_MatRedundant" 2542 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2543 { 2544 PetscErrorCode ierr; 2545 PetscContainer container; 2546 Mat_Redundant *redund=NULL; 2547 2548 PetscFunctionBegin; 2549 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2550 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2551 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2552 2553 A->ops->destroy = redund->Destroy; 2554 2555 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2556 if (A->ops->destroy) { 2557 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2558 } 2559 PetscFunctionReturn(0); 2560 } 2561 2562 #undef __FUNCT__ 2563 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2564 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant) 2565 { 2566 PetscMPIInt rank,size; 2567 MPI_Comm comm,subcomm=psubcomm->comm; 2568 PetscErrorCode ierr; 2569 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2570 PetscMPIInt *send_rank= NULL,*recv_rank=NULL; 2571 PetscInt *rowrange = mat->rmap->range; 2572 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2573 Mat A = aij->A,B=aij->B,C=*matredundant; 2574 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2575 PetscScalar *sbuf_a; 2576 PetscInt nzlocal=a->nz+b->nz; 2577 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2578 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2579 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2580 MatScalar *aworkA,*aworkB; 2581 PetscScalar *vals; 2582 PetscMPIInt tag1,tag2,tag3,imdex; 2583 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2584 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2585 MPI_Status recv_status,*send_status; 2586 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2587 PetscInt **rbuf_j=NULL; 2588 PetscScalar **rbuf_a=NULL; 2589 Mat_Redundant *redund =NULL; 2590 PetscContainer container; 2591 PetscInt rstart_sub,rend_sub,mloc_sub; 2592 2593 PetscFunctionBegin; 2594 if (psubcomm->type != PETSC_SUBCOMM_INTERLACED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMETYPE,"Input psubcomm must be PETSC_SUBCOMM_INTERLACED type"); 2595 ierr = MatGetSize(mat,&M,&N);CHKERRQ(ierr); 2596 if (M != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-square matrix yet, rows %D columns %D",M,N); 2597 2598 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2599 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2600 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2601 2602 /* printf("[%d] MatGetRedundantMatrix_MPIAIJ_interlaced...\n",rank); */ 2603 2604 if (reuse == MAT_REUSE_MATRIX) { 2605 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2606 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2607 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2608 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2609 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2610 2611 nsends = redund->nsends; 2612 nrecvs = redund->nrecvs; 2613 send_rank = redund->send_rank; 2614 recv_rank = redund->recv_rank; 2615 sbuf_nz = redund->sbuf_nz; 2616 rbuf_nz = redund->rbuf_nz; 2617 sbuf_j = redund->sbuf_j; 2618 sbuf_a = redund->sbuf_a; 2619 rbuf_j = redund->rbuf_j; 2620 rbuf_a = redund->rbuf_a; 2621 } 2622 2623 if (reuse == MAT_INITIAL_MATRIX) { 2624 PetscMPIInt subrank,subsize; 2625 PetscInt nleftover,np_subcomm; 2626 2627 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2628 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2629 2630 /* get local size of redundant matrix */ 2631 const PetscInt *range; 2632 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2633 rstart_sub = range[nsubcomm*subrank]; 2634 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2635 rend_sub = range[nsubcomm*(subrank+1)]; 2636 } else { 2637 rend_sub = mat->rmap->N; 2638 } 2639 2640 mloc_sub = rend_sub - rstart_sub; 2641 /* printf("[%d] rstart_sub %d %d, mloc_sub %d\n",rank,rstart_sub,rend_sub, mloc_sub); */ 2642 2643 /* get the destination processors' id send_rank, nsends and nrecvs */ 2644 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2645 2646 np_subcomm = size/nsubcomm; 2647 nleftover = size - nsubcomm*np_subcomm; 2648 2649 nsends = 0; nrecvs = 0; 2650 for (i=0; i<size; i++) { 2651 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2652 send_rank[nsends] = i; nsends++; 2653 recv_rank[nrecvs++] = i; 2654 } 2655 } 2656 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2657 i = size-nleftover-1; 2658 j = 0; 2659 while (j < nsubcomm - nleftover) { 2660 send_rank[nsends++] = i; 2661 i--; j++; 2662 } 2663 } 2664 2665 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2666 for (i=0; i<nleftover; i++) { 2667 recv_rank[nrecvs++] = size-nleftover+i; 2668 } 2669 } 2670 2671 /* allocate sbuf_j, sbuf_a */ 2672 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2673 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2674 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2675 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2676 2677 /* copy mat's local entries into the buffers */ 2678 if (reuse == MAT_INITIAL_MATRIX) { 2679 rownz_max = 0; 2680 rptr = sbuf_j; 2681 cols = sbuf_j + rend-rstart + 1; 2682 vals = sbuf_a; 2683 rptr[0] = 0; 2684 for (i=0; i<rend-rstart; i++) { 2685 row = i + rstart; 2686 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2687 ncols = nzA + nzB; 2688 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2689 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2690 /* load the column indices for this row into cols */ 2691 lwrite = 0; 2692 for (l=0; l<nzB; l++) { 2693 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2694 vals[lwrite] = aworkB[l]; 2695 cols[lwrite++] = ctmp; 2696 } 2697 } 2698 for (l=0; l<nzA; l++) { 2699 vals[lwrite] = aworkA[l]; 2700 cols[lwrite++] = cstart + cworkA[l]; 2701 } 2702 for (l=0; l<nzB; l++) { 2703 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2704 vals[lwrite] = aworkB[l]; 2705 cols[lwrite++] = ctmp; 2706 } 2707 } 2708 vals += ncols; 2709 cols += ncols; 2710 rptr[i+1] = rptr[i] + ncols; 2711 if (rownz_max < ncols) rownz_max = ncols; 2712 } 2713 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2714 } else { /* only copy matrix values into sbuf_a */ 2715 rptr = sbuf_j; 2716 vals = sbuf_a; 2717 rptr[0] = 0; 2718 for (i=0; i<rend-rstart; i++) { 2719 row = i + rstart; 2720 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2721 ncols = nzA + nzB; 2722 cworkB = b->j + b->i[i]; 2723 aworkA = a->a + a->i[i]; 2724 aworkB = b->a + b->i[i]; 2725 lwrite = 0; 2726 for (l=0; l<nzB; l++) { 2727 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2728 } 2729 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2730 for (l=0; l<nzB; l++) { 2731 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2732 } 2733 vals += ncols; 2734 rptr[i+1] = rptr[i] + ncols; 2735 } 2736 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2737 2738 /* send nzlocal to others, and recv other's nzlocal */ 2739 /*--------------------------------------------------*/ 2740 if (reuse == MAT_INITIAL_MATRIX) { 2741 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2742 2743 s_waits2 = s_waits3 + nsends; 2744 s_waits1 = s_waits2 + nsends; 2745 r_waits1 = s_waits1 + nsends; 2746 r_waits2 = r_waits1 + nrecvs; 2747 r_waits3 = r_waits2 + nrecvs; 2748 } else { 2749 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2750 2751 r_waits3 = s_waits3 + nsends; 2752 } 2753 2754 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2755 if (reuse == MAT_INITIAL_MATRIX) { 2756 /* get new tags to keep the communication clean */ 2757 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2758 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2759 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2760 2761 /* post receives of other's nzlocal */ 2762 for (i=0; i<nrecvs; i++) { 2763 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2764 } 2765 /* send nzlocal to others */ 2766 for (i=0; i<nsends; i++) { 2767 sbuf_nz[i] = nzlocal; 2768 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2769 } 2770 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2771 count = nrecvs; 2772 while (count) { 2773 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2774 2775 recv_rank[imdex] = recv_status.MPI_SOURCE; 2776 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2777 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2778 2779 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2780 2781 rbuf_nz[imdex] += i + 2; 2782 2783 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2784 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2785 count--; 2786 } 2787 /* wait on sends of nzlocal */ 2788 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2789 /* send mat->i,j to others, and recv from other's */ 2790 /*------------------------------------------------*/ 2791 for (i=0; i<nsends; i++) { 2792 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2793 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2794 } 2795 /* wait on receives of mat->i,j */ 2796 /*------------------------------*/ 2797 count = nrecvs; 2798 while (count) { 2799 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2800 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2801 count--; 2802 } 2803 /* wait on sends of mat->i,j */ 2804 /*---------------------------*/ 2805 if (nsends) { 2806 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2807 } 2808 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2809 2810 /* post receives, send and receive mat->a */ 2811 /*----------------------------------------*/ 2812 for (imdex=0; imdex<nrecvs; imdex++) { 2813 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2814 } 2815 for (i=0; i<nsends; i++) { 2816 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2817 } 2818 count = nrecvs; 2819 while (count) { 2820 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2821 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2822 count--; 2823 } 2824 if (nsends) { 2825 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2826 } 2827 2828 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2829 2830 /* create redundant matrix */ 2831 /*-------------------------*/ 2832 if (reuse == MAT_INITIAL_MATRIX) { 2833 /* compute rownz_max for preallocation */ 2834 for (imdex=0; imdex<nrecvs; imdex++) { 2835 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2836 rptr = rbuf_j[imdex]; 2837 for (i=0; i<j; i++) { 2838 ncols = rptr[i+1] - rptr[i]; 2839 if (rownz_max < ncols) rownz_max = ncols; 2840 } 2841 } 2842 2843 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2844 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2845 /* ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DETERMINE,mat->cmap->N);CHKERRQ(ierr); -- give incorrect local column! */ 2846 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2847 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2848 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2849 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2850 } else { 2851 C = *matredundant; 2852 } 2853 2854 /* insert local matrix entries */ 2855 rptr = sbuf_j; 2856 cols = sbuf_j + rend-rstart + 1; 2857 vals = sbuf_a; 2858 for (i=0; i<rend-rstart; i++) { 2859 row = i + rstart; 2860 ncols = rptr[i+1] - rptr[i]; 2861 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2862 vals += ncols; 2863 cols += ncols; 2864 } 2865 /* insert received matrix entries */ 2866 for (imdex=0; imdex<nrecvs; imdex++) { 2867 rstart = rowrange[recv_rank[imdex]]; 2868 rend = rowrange[recv_rank[imdex]+1]; 2869 rptr = rbuf_j[imdex]; 2870 cols = rbuf_j[imdex] + rend-rstart + 1; 2871 vals = rbuf_a[imdex]; 2872 for (i=0; i<rend-rstart; i++) { 2873 row = i + rstart; 2874 ncols = rptr[i+1] - rptr[i]; 2875 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2876 vals += ncols; 2877 cols += ncols; 2878 } 2879 } 2880 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2881 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2882 2883 if (reuse == MAT_INITIAL_MATRIX) { 2884 PetscContainer container; 2885 *matredundant = C; 2886 /* create a supporting struct and attach it to C for reuse */ 2887 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2888 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2889 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2890 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2891 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2892 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2893 2894 redund->nzlocal = nzlocal; 2895 redund->nsends = nsends; 2896 redund->nrecvs = nrecvs; 2897 redund->send_rank = send_rank; 2898 redund->recv_rank = recv_rank; 2899 redund->sbuf_nz = sbuf_nz; 2900 redund->rbuf_nz = rbuf_nz; 2901 redund->sbuf_j = sbuf_j; 2902 redund->sbuf_a = sbuf_a; 2903 redund->rbuf_j = rbuf_j; 2904 redund->rbuf_a = rbuf_a; 2905 2906 redund->Destroy = C->ops->destroy; 2907 C->ops->destroy = MatDestroy_MatRedundant; 2908 } 2909 PetscFunctionReturn(0); 2910 } 2911 2912 #undef __FUNCT__ 2913 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2914 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2915 { 2916 PetscErrorCode ierr; 2917 MPI_Comm comm; 2918 PetscMPIInt size; 2919 2920 PetscFunctionBegin; 2921 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2922 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2923 /* Only MatGetRedundantMatrix_MPIAIJ_interlaced() is written now */ 2924 if (!subcomm || nsubcomm == size) { /* create psubcomm */ 2925 PetscSubcomm psubcomm; 2926 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2927 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2928 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_INTERLACED);CHKERRQ(ierr); 2929 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,psubcomm,reuse,matredundant);CHKERRQ(ierr); 2930 /* destroy psubcomm? */ 2931 } else { 2932 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet"); 2933 } 2934 PetscFunctionReturn(0); 2935 } 2936 2937 #undef __FUNCT__ 2938 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2939 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2940 { 2941 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2942 PetscErrorCode ierr; 2943 PetscInt i,*idxb = 0; 2944 PetscScalar *va,*vb; 2945 Vec vtmp; 2946 2947 PetscFunctionBegin; 2948 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2949 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2950 if (idx) { 2951 for (i=0; i<A->rmap->n; i++) { 2952 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2953 } 2954 } 2955 2956 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2957 if (idx) { 2958 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2959 } 2960 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2961 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2962 2963 for (i=0; i<A->rmap->n; i++) { 2964 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2965 va[i] = vb[i]; 2966 if (idx) idx[i] = a->garray[idxb[i]]; 2967 } 2968 } 2969 2970 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2971 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2972 ierr = PetscFree(idxb);CHKERRQ(ierr); 2973 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2974 PetscFunctionReturn(0); 2975 } 2976 2977 #undef __FUNCT__ 2978 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2979 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2980 { 2981 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2982 PetscErrorCode ierr; 2983 PetscInt i,*idxb = 0; 2984 PetscScalar *va,*vb; 2985 Vec vtmp; 2986 2987 PetscFunctionBegin; 2988 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2989 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2990 if (idx) { 2991 for (i=0; i<A->cmap->n; i++) { 2992 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2993 } 2994 } 2995 2996 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2997 if (idx) { 2998 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2999 } 3000 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 3001 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 3002 3003 for (i=0; i<A->rmap->n; i++) { 3004 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 3005 va[i] = vb[i]; 3006 if (idx) idx[i] = a->garray[idxb[i]]; 3007 } 3008 } 3009 3010 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3011 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3012 ierr = PetscFree(idxb);CHKERRQ(ierr); 3013 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3014 PetscFunctionReturn(0); 3015 } 3016 3017 #undef __FUNCT__ 3018 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 3019 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3020 { 3021 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3022 PetscInt n = A->rmap->n; 3023 PetscInt cstart = A->cmap->rstart; 3024 PetscInt *cmap = mat->garray; 3025 PetscInt *diagIdx, *offdiagIdx; 3026 Vec diagV, offdiagV; 3027 PetscScalar *a, *diagA, *offdiagA; 3028 PetscInt r; 3029 PetscErrorCode ierr; 3030 3031 PetscFunctionBegin; 3032 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3033 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 3034 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 3035 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3036 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3037 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3038 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3039 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3040 for (r = 0; r < n; ++r) { 3041 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3042 a[r] = diagA[r]; 3043 idx[r] = cstart + diagIdx[r]; 3044 } else { 3045 a[r] = offdiagA[r]; 3046 idx[r] = cmap[offdiagIdx[r]]; 3047 } 3048 } 3049 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3050 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3051 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3052 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3053 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3054 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3055 PetscFunctionReturn(0); 3056 } 3057 3058 #undef __FUNCT__ 3059 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3060 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3061 { 3062 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3063 PetscInt n = A->rmap->n; 3064 PetscInt cstart = A->cmap->rstart; 3065 PetscInt *cmap = mat->garray; 3066 PetscInt *diagIdx, *offdiagIdx; 3067 Vec diagV, offdiagV; 3068 PetscScalar *a, *diagA, *offdiagA; 3069 PetscInt r; 3070 PetscErrorCode ierr; 3071 3072 PetscFunctionBegin; 3073 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3074 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3075 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3076 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3077 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3078 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3079 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3080 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3081 for (r = 0; r < n; ++r) { 3082 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3083 a[r] = diagA[r]; 3084 idx[r] = cstart + diagIdx[r]; 3085 } else { 3086 a[r] = offdiagA[r]; 3087 idx[r] = cmap[offdiagIdx[r]]; 3088 } 3089 } 3090 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3091 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3092 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3093 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3094 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3095 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3096 PetscFunctionReturn(0); 3097 } 3098 3099 #undef __FUNCT__ 3100 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3101 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3102 { 3103 PetscErrorCode ierr; 3104 Mat *dummy; 3105 3106 PetscFunctionBegin; 3107 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3108 *newmat = *dummy; 3109 ierr = PetscFree(dummy);CHKERRQ(ierr); 3110 PetscFunctionReturn(0); 3111 } 3112 3113 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3114 3115 #undef __FUNCT__ 3116 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3117 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3118 { 3119 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3120 PetscErrorCode ierr; 3121 3122 PetscFunctionBegin; 3123 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3124 PetscFunctionReturn(0); 3125 } 3126 3127 #undef __FUNCT__ 3128 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3129 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3130 { 3131 PetscErrorCode ierr; 3132 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3133 3134 PetscFunctionBegin; 3135 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3136 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3137 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3138 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3139 PetscFunctionReturn(0); 3140 } 3141 3142 /* -------------------------------------------------------------------*/ 3143 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3144 MatGetRow_MPIAIJ, 3145 MatRestoreRow_MPIAIJ, 3146 MatMult_MPIAIJ, 3147 /* 4*/ MatMultAdd_MPIAIJ, 3148 MatMultTranspose_MPIAIJ, 3149 MatMultTransposeAdd_MPIAIJ, 3150 #if defined(PETSC_HAVE_PBGL) 3151 MatSolve_MPIAIJ, 3152 #else 3153 0, 3154 #endif 3155 0, 3156 0, 3157 /*10*/ 0, 3158 0, 3159 0, 3160 MatSOR_MPIAIJ, 3161 MatTranspose_MPIAIJ, 3162 /*15*/ MatGetInfo_MPIAIJ, 3163 MatEqual_MPIAIJ, 3164 MatGetDiagonal_MPIAIJ, 3165 MatDiagonalScale_MPIAIJ, 3166 MatNorm_MPIAIJ, 3167 /*20*/ MatAssemblyBegin_MPIAIJ, 3168 MatAssemblyEnd_MPIAIJ, 3169 MatSetOption_MPIAIJ, 3170 MatZeroEntries_MPIAIJ, 3171 /*24*/ MatZeroRows_MPIAIJ, 3172 0, 3173 #if defined(PETSC_HAVE_PBGL) 3174 0, 3175 #else 3176 0, 3177 #endif 3178 0, 3179 0, 3180 /*29*/ MatSetUp_MPIAIJ, 3181 #if defined(PETSC_HAVE_PBGL) 3182 0, 3183 #else 3184 0, 3185 #endif 3186 0, 3187 0, 3188 0, 3189 /*34*/ MatDuplicate_MPIAIJ, 3190 0, 3191 0, 3192 0, 3193 0, 3194 /*39*/ MatAXPY_MPIAIJ, 3195 MatGetSubMatrices_MPIAIJ, 3196 MatIncreaseOverlap_MPIAIJ, 3197 MatGetValues_MPIAIJ, 3198 MatCopy_MPIAIJ, 3199 /*44*/ MatGetRowMax_MPIAIJ, 3200 MatScale_MPIAIJ, 3201 0, 3202 0, 3203 MatZeroRowsColumns_MPIAIJ, 3204 /*49*/ MatSetRandom_MPIAIJ, 3205 0, 3206 0, 3207 0, 3208 0, 3209 /*54*/ MatFDColoringCreate_MPIAIJ, 3210 0, 3211 MatSetUnfactored_MPIAIJ, 3212 MatPermute_MPIAIJ, 3213 0, 3214 /*59*/ MatGetSubMatrix_MPIAIJ, 3215 MatDestroy_MPIAIJ, 3216 MatView_MPIAIJ, 3217 0, 3218 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3219 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3220 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3221 0, 3222 0, 3223 0, 3224 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3225 MatGetRowMinAbs_MPIAIJ, 3226 0, 3227 MatSetColoring_MPIAIJ, 3228 0, 3229 MatSetValuesAdifor_MPIAIJ, 3230 /*75*/ MatFDColoringApply_AIJ, 3231 0, 3232 0, 3233 0, 3234 MatFindZeroDiagonals_MPIAIJ, 3235 /*80*/ 0, 3236 0, 3237 0, 3238 /*83*/ MatLoad_MPIAIJ, 3239 0, 3240 0, 3241 0, 3242 0, 3243 0, 3244 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3245 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3246 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3247 MatPtAP_MPIAIJ_MPIAIJ, 3248 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3249 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3250 0, 3251 0, 3252 0, 3253 0, 3254 /*99*/ 0, 3255 0, 3256 0, 3257 MatConjugate_MPIAIJ, 3258 0, 3259 /*104*/MatSetValuesRow_MPIAIJ, 3260 MatRealPart_MPIAIJ, 3261 MatImaginaryPart_MPIAIJ, 3262 0, 3263 0, 3264 /*109*/0, 3265 MatGetRedundantMatrix_MPIAIJ, 3266 MatGetRowMin_MPIAIJ, 3267 0, 3268 0, 3269 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3270 0, 3271 0, 3272 0, 3273 0, 3274 /*119*/0, 3275 0, 3276 0, 3277 0, 3278 MatGetMultiProcBlock_MPIAIJ, 3279 /*124*/MatFindNonzeroRows_MPIAIJ, 3280 MatGetColumnNorms_MPIAIJ, 3281 MatInvertBlockDiagonal_MPIAIJ, 3282 0, 3283 MatGetSubMatricesParallel_MPIAIJ, 3284 /*129*/0, 3285 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3286 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3287 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3288 0, 3289 /*134*/0, 3290 0, 3291 0, 3292 0, 3293 0, 3294 /*139*/0, 3295 0 3296 }; 3297 3298 /* ----------------------------------------------------------------------------------------*/ 3299 3300 #undef __FUNCT__ 3301 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3302 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3303 { 3304 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3305 PetscErrorCode ierr; 3306 3307 PetscFunctionBegin; 3308 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3309 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3310 PetscFunctionReturn(0); 3311 } 3312 3313 #undef __FUNCT__ 3314 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3315 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3316 { 3317 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3318 PetscErrorCode ierr; 3319 3320 PetscFunctionBegin; 3321 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3322 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3323 PetscFunctionReturn(0); 3324 } 3325 3326 #undef __FUNCT__ 3327 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3328 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3329 { 3330 Mat_MPIAIJ *b; 3331 PetscErrorCode ierr; 3332 3333 PetscFunctionBegin; 3334 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3335 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3336 b = (Mat_MPIAIJ*)B->data; 3337 3338 if (!B->preallocated) { 3339 /* Explicitly create 2 MATSEQAIJ matrices. */ 3340 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3341 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3342 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3343 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3344 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3345 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3346 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3347 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3348 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3349 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3350 } 3351 3352 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3353 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3354 B->preallocated = PETSC_TRUE; 3355 PetscFunctionReturn(0); 3356 } 3357 3358 #undef __FUNCT__ 3359 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3360 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3361 { 3362 Mat mat; 3363 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3364 PetscErrorCode ierr; 3365 3366 PetscFunctionBegin; 3367 *newmat = 0; 3368 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3369 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3370 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3371 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3372 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3373 a = (Mat_MPIAIJ*)mat->data; 3374 3375 mat->factortype = matin->factortype; 3376 mat->rmap->bs = matin->rmap->bs; 3377 mat->cmap->bs = matin->cmap->bs; 3378 mat->assembled = PETSC_TRUE; 3379 mat->insertmode = NOT_SET_VALUES; 3380 mat->preallocated = PETSC_TRUE; 3381 3382 a->size = oldmat->size; 3383 a->rank = oldmat->rank; 3384 a->donotstash = oldmat->donotstash; 3385 a->roworiented = oldmat->roworiented; 3386 a->rowindices = 0; 3387 a->rowvalues = 0; 3388 a->getrowactive = PETSC_FALSE; 3389 3390 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3391 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3392 3393 if (oldmat->colmap) { 3394 #if defined(PETSC_USE_CTABLE) 3395 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3396 #else 3397 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3398 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3399 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3400 #endif 3401 } else a->colmap = 0; 3402 if (oldmat->garray) { 3403 PetscInt len; 3404 len = oldmat->B->cmap->n; 3405 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3406 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3407 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3408 } else a->garray = 0; 3409 3410 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3411 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3412 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3413 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3414 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3415 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3416 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3417 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3418 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3419 *newmat = mat; 3420 PetscFunctionReturn(0); 3421 } 3422 3423 3424 3425 #undef __FUNCT__ 3426 #define __FUNCT__ "MatLoad_MPIAIJ" 3427 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3428 { 3429 PetscScalar *vals,*svals; 3430 MPI_Comm comm; 3431 PetscErrorCode ierr; 3432 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3433 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3434 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3435 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3436 PetscInt cend,cstart,n,*rowners,sizesset=1; 3437 int fd; 3438 PetscInt bs = 1; 3439 3440 PetscFunctionBegin; 3441 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3442 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3443 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3444 if (!rank) { 3445 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3446 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3447 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3448 } 3449 3450 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3451 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3452 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3453 3454 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3455 3456 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3457 M = header[1]; N = header[2]; 3458 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3459 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3460 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3461 3462 /* If global sizes are set, check if they are consistent with that given in the file */ 3463 if (sizesset) { 3464 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3465 } 3466 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3467 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3468 3469 /* determine ownership of all (block) rows */ 3470 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3471 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3472 else m = newMat->rmap->n; /* Set by user */ 3473 3474 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3475 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3476 3477 /* First process needs enough room for process with most rows */ 3478 if (!rank) { 3479 mmax = rowners[1]; 3480 for (i=2; i<=size; i++) { 3481 mmax = PetscMax(mmax, rowners[i]); 3482 } 3483 } else mmax = -1; /* unused, but compilers complain */ 3484 3485 rowners[0] = 0; 3486 for (i=2; i<=size; i++) { 3487 rowners[i] += rowners[i-1]; 3488 } 3489 rstart = rowners[rank]; 3490 rend = rowners[rank+1]; 3491 3492 /* distribute row lengths to all processors */ 3493 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 3494 if (!rank) { 3495 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3496 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3497 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3498 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3499 for (j=0; j<m; j++) { 3500 procsnz[0] += ourlens[j]; 3501 } 3502 for (i=1; i<size; i++) { 3503 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3504 /* calculate the number of nonzeros on each processor */ 3505 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3506 procsnz[i] += rowlengths[j]; 3507 } 3508 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3509 } 3510 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3511 } else { 3512 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3513 } 3514 3515 if (!rank) { 3516 /* determine max buffer needed and allocate it */ 3517 maxnz = 0; 3518 for (i=0; i<size; i++) { 3519 maxnz = PetscMax(maxnz,procsnz[i]); 3520 } 3521 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3522 3523 /* read in my part of the matrix column indices */ 3524 nz = procsnz[0]; 3525 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3526 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3527 3528 /* read in every one elses and ship off */ 3529 for (i=1; i<size; i++) { 3530 nz = procsnz[i]; 3531 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3532 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3533 } 3534 ierr = PetscFree(cols);CHKERRQ(ierr); 3535 } else { 3536 /* determine buffer space needed for message */ 3537 nz = 0; 3538 for (i=0; i<m; i++) { 3539 nz += ourlens[i]; 3540 } 3541 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3542 3543 /* receive message of column indices*/ 3544 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3545 } 3546 3547 /* determine column ownership if matrix is not square */ 3548 if (N != M) { 3549 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3550 else n = newMat->cmap->n; 3551 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3552 cstart = cend - n; 3553 } else { 3554 cstart = rstart; 3555 cend = rend; 3556 n = cend - cstart; 3557 } 3558 3559 /* loop over local rows, determining number of off diagonal entries */ 3560 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3561 jj = 0; 3562 for (i=0; i<m; i++) { 3563 for (j=0; j<ourlens[i]; j++) { 3564 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3565 jj++; 3566 } 3567 } 3568 3569 for (i=0; i<m; i++) { 3570 ourlens[i] -= offlens[i]; 3571 } 3572 if (!sizesset) { 3573 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3574 } 3575 3576 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3577 3578 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3579 3580 for (i=0; i<m; i++) { 3581 ourlens[i] += offlens[i]; 3582 } 3583 3584 if (!rank) { 3585 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3586 3587 /* read in my part of the matrix numerical values */ 3588 nz = procsnz[0]; 3589 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3590 3591 /* insert into matrix */ 3592 jj = rstart; 3593 smycols = mycols; 3594 svals = vals; 3595 for (i=0; i<m; i++) { 3596 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3597 smycols += ourlens[i]; 3598 svals += ourlens[i]; 3599 jj++; 3600 } 3601 3602 /* read in other processors and ship out */ 3603 for (i=1; i<size; i++) { 3604 nz = procsnz[i]; 3605 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3606 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3607 } 3608 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3609 } else { 3610 /* receive numeric values */ 3611 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3612 3613 /* receive message of values*/ 3614 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3615 3616 /* insert into matrix */ 3617 jj = rstart; 3618 smycols = mycols; 3619 svals = vals; 3620 for (i=0; i<m; i++) { 3621 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3622 smycols += ourlens[i]; 3623 svals += ourlens[i]; 3624 jj++; 3625 } 3626 } 3627 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3628 ierr = PetscFree(vals);CHKERRQ(ierr); 3629 ierr = PetscFree(mycols);CHKERRQ(ierr); 3630 ierr = PetscFree(rowners);CHKERRQ(ierr); 3631 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3632 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3633 PetscFunctionReturn(0); 3634 } 3635 3636 #undef __FUNCT__ 3637 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3638 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3639 { 3640 PetscErrorCode ierr; 3641 IS iscol_local; 3642 PetscInt csize; 3643 3644 PetscFunctionBegin; 3645 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3646 if (call == MAT_REUSE_MATRIX) { 3647 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3648 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3649 } else { 3650 PetscInt cbs; 3651 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3652 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3653 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3654 } 3655 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3656 if (call == MAT_INITIAL_MATRIX) { 3657 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3658 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3659 } 3660 PetscFunctionReturn(0); 3661 } 3662 3663 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3664 #undef __FUNCT__ 3665 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3666 /* 3667 Not great since it makes two copies of the submatrix, first an SeqAIJ 3668 in local and then by concatenating the local matrices the end result. 3669 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3670 3671 Note: This requires a sequential iscol with all indices. 3672 */ 3673 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3674 { 3675 PetscErrorCode ierr; 3676 PetscMPIInt rank,size; 3677 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3678 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3679 PetscBool allcolumns, colflag; 3680 Mat M,Mreuse; 3681 MatScalar *vwork,*aa; 3682 MPI_Comm comm; 3683 Mat_SeqAIJ *aij; 3684 3685 PetscFunctionBegin; 3686 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3687 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3688 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3689 3690 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3691 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3692 if (colflag && ncol == mat->cmap->N) { 3693 allcolumns = PETSC_TRUE; 3694 } else { 3695 allcolumns = PETSC_FALSE; 3696 } 3697 if (call == MAT_REUSE_MATRIX) { 3698 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3699 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3700 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3701 } else { 3702 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3703 } 3704 3705 /* 3706 m - number of local rows 3707 n - number of columns (same on all processors) 3708 rstart - first row in new global matrix generated 3709 */ 3710 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3711 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3712 if (call == MAT_INITIAL_MATRIX) { 3713 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3714 ii = aij->i; 3715 jj = aij->j; 3716 3717 /* 3718 Determine the number of non-zeros in the diagonal and off-diagonal 3719 portions of the matrix in order to do correct preallocation 3720 */ 3721 3722 /* first get start and end of "diagonal" columns */ 3723 if (csize == PETSC_DECIDE) { 3724 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3725 if (mglobal == n) { /* square matrix */ 3726 nlocal = m; 3727 } else { 3728 nlocal = n/size + ((n % size) > rank); 3729 } 3730 } else { 3731 nlocal = csize; 3732 } 3733 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3734 rstart = rend - nlocal; 3735 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3736 3737 /* next, compute all the lengths */ 3738 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3739 olens = dlens + m; 3740 for (i=0; i<m; i++) { 3741 jend = ii[i+1] - ii[i]; 3742 olen = 0; 3743 dlen = 0; 3744 for (j=0; j<jend; j++) { 3745 if (*jj < rstart || *jj >= rend) olen++; 3746 else dlen++; 3747 jj++; 3748 } 3749 olens[i] = olen; 3750 dlens[i] = dlen; 3751 } 3752 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3753 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3754 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3755 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3756 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3757 ierr = PetscFree(dlens);CHKERRQ(ierr); 3758 } else { 3759 PetscInt ml,nl; 3760 3761 M = *newmat; 3762 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3763 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3764 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3765 /* 3766 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3767 rather than the slower MatSetValues(). 3768 */ 3769 M->was_assembled = PETSC_TRUE; 3770 M->assembled = PETSC_FALSE; 3771 } 3772 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3773 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3774 ii = aij->i; 3775 jj = aij->j; 3776 aa = aij->a; 3777 for (i=0; i<m; i++) { 3778 row = rstart + i; 3779 nz = ii[i+1] - ii[i]; 3780 cwork = jj; jj += nz; 3781 vwork = aa; aa += nz; 3782 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3783 } 3784 3785 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3786 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3787 *newmat = M; 3788 3789 /* save submatrix used in processor for next request */ 3790 if (call == MAT_INITIAL_MATRIX) { 3791 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3792 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3793 } 3794 PetscFunctionReturn(0); 3795 } 3796 3797 #undef __FUNCT__ 3798 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3799 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3800 { 3801 PetscInt m,cstart, cend,j,nnz,i,d; 3802 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3803 const PetscInt *JJ; 3804 PetscScalar *values; 3805 PetscErrorCode ierr; 3806 3807 PetscFunctionBegin; 3808 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3809 3810 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3811 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3812 m = B->rmap->n; 3813 cstart = B->cmap->rstart; 3814 cend = B->cmap->rend; 3815 rstart = B->rmap->rstart; 3816 3817 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3818 3819 #if defined(PETSC_USE_DEBUGGING) 3820 for (i=0; i<m; i++) { 3821 nnz = Ii[i+1]- Ii[i]; 3822 JJ = J + Ii[i]; 3823 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3824 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3825 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3826 } 3827 #endif 3828 3829 for (i=0; i<m; i++) { 3830 nnz = Ii[i+1]- Ii[i]; 3831 JJ = J + Ii[i]; 3832 nnz_max = PetscMax(nnz_max,nnz); 3833 d = 0; 3834 for (j=0; j<nnz; j++) { 3835 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3836 } 3837 d_nnz[i] = d; 3838 o_nnz[i] = nnz - d; 3839 } 3840 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3841 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3842 3843 if (v) values = (PetscScalar*)v; 3844 else { 3845 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3846 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3847 } 3848 3849 for (i=0; i<m; i++) { 3850 ii = i + rstart; 3851 nnz = Ii[i+1]- Ii[i]; 3852 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3853 } 3854 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3855 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3856 3857 if (!v) { 3858 ierr = PetscFree(values);CHKERRQ(ierr); 3859 } 3860 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3861 PetscFunctionReturn(0); 3862 } 3863 3864 #undef __FUNCT__ 3865 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3866 /*@ 3867 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3868 (the default parallel PETSc format). 3869 3870 Collective on MPI_Comm 3871 3872 Input Parameters: 3873 + B - the matrix 3874 . i - the indices into j for the start of each local row (starts with zero) 3875 . j - the column indices for each local row (starts with zero) 3876 - v - optional values in the matrix 3877 3878 Level: developer 3879 3880 Notes: 3881 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3882 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3883 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3884 3885 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3886 3887 The format which is used for the sparse matrix input, is equivalent to a 3888 row-major ordering.. i.e for the following matrix, the input data expected is 3889 as shown: 3890 3891 1 0 0 3892 2 0 3 P0 3893 ------- 3894 4 5 6 P1 3895 3896 Process0 [P0]: rows_owned=[0,1] 3897 i = {0,1,3} [size = nrow+1 = 2+1] 3898 j = {0,0,2} [size = nz = 6] 3899 v = {1,2,3} [size = nz = 6] 3900 3901 Process1 [P1]: rows_owned=[2] 3902 i = {0,3} [size = nrow+1 = 1+1] 3903 j = {0,1,2} [size = nz = 6] 3904 v = {4,5,6} [size = nz = 6] 3905 3906 .keywords: matrix, aij, compressed row, sparse, parallel 3907 3908 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3909 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3910 @*/ 3911 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3912 { 3913 PetscErrorCode ierr; 3914 3915 PetscFunctionBegin; 3916 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3917 PetscFunctionReturn(0); 3918 } 3919 3920 #undef __FUNCT__ 3921 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3922 /*@C 3923 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3924 (the default parallel PETSc format). For good matrix assembly performance 3925 the user should preallocate the matrix storage by setting the parameters 3926 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3927 performance can be increased by more than a factor of 50. 3928 3929 Collective on MPI_Comm 3930 3931 Input Parameters: 3932 + A - the matrix 3933 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3934 (same value is used for all local rows) 3935 . d_nnz - array containing the number of nonzeros in the various rows of the 3936 DIAGONAL portion of the local submatrix (possibly different for each row) 3937 or NULL, if d_nz is used to specify the nonzero structure. 3938 The size of this array is equal to the number of local rows, i.e 'm'. 3939 For matrices that will be factored, you must leave room for (and set) 3940 the diagonal entry even if it is zero. 3941 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3942 submatrix (same value is used for all local rows). 3943 - o_nnz - array containing the number of nonzeros in the various rows of the 3944 OFF-DIAGONAL portion of the local submatrix (possibly different for 3945 each row) or NULL, if o_nz is used to specify the nonzero 3946 structure. The size of this array is equal to the number 3947 of local rows, i.e 'm'. 3948 3949 If the *_nnz parameter is given then the *_nz parameter is ignored 3950 3951 The AIJ format (also called the Yale sparse matrix format or 3952 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3953 storage. The stored row and column indices begin with zero. 3954 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3955 3956 The parallel matrix is partitioned such that the first m0 rows belong to 3957 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3958 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3959 3960 The DIAGONAL portion of the local submatrix of a processor can be defined 3961 as the submatrix which is obtained by extraction the part corresponding to 3962 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3963 first row that belongs to the processor, r2 is the last row belonging to 3964 the this processor, and c1-c2 is range of indices of the local part of a 3965 vector suitable for applying the matrix to. This is an mxn matrix. In the 3966 common case of a square matrix, the row and column ranges are the same and 3967 the DIAGONAL part is also square. The remaining portion of the local 3968 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3969 3970 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3971 3972 You can call MatGetInfo() to get information on how effective the preallocation was; 3973 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3974 You can also run with the option -info and look for messages with the string 3975 malloc in them to see if additional memory allocation was needed. 3976 3977 Example usage: 3978 3979 Consider the following 8x8 matrix with 34 non-zero values, that is 3980 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3981 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3982 as follows: 3983 3984 .vb 3985 1 2 0 | 0 3 0 | 0 4 3986 Proc0 0 5 6 | 7 0 0 | 8 0 3987 9 0 10 | 11 0 0 | 12 0 3988 ------------------------------------- 3989 13 0 14 | 15 16 17 | 0 0 3990 Proc1 0 18 0 | 19 20 21 | 0 0 3991 0 0 0 | 22 23 0 | 24 0 3992 ------------------------------------- 3993 Proc2 25 26 27 | 0 0 28 | 29 0 3994 30 0 0 | 31 32 33 | 0 34 3995 .ve 3996 3997 This can be represented as a collection of submatrices as: 3998 3999 .vb 4000 A B C 4001 D E F 4002 G H I 4003 .ve 4004 4005 Where the submatrices A,B,C are owned by proc0, D,E,F are 4006 owned by proc1, G,H,I are owned by proc2. 4007 4008 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4009 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4010 The 'M','N' parameters are 8,8, and have the same values on all procs. 4011 4012 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4013 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4014 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4015 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4016 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4017 matrix, ans [DF] as another SeqAIJ matrix. 4018 4019 When d_nz, o_nz parameters are specified, d_nz storage elements are 4020 allocated for every row of the local diagonal submatrix, and o_nz 4021 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4022 One way to choose d_nz and o_nz is to use the max nonzerors per local 4023 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4024 In this case, the values of d_nz,o_nz are: 4025 .vb 4026 proc0 : dnz = 2, o_nz = 2 4027 proc1 : dnz = 3, o_nz = 2 4028 proc2 : dnz = 1, o_nz = 4 4029 .ve 4030 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4031 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4032 for proc3. i.e we are using 12+15+10=37 storage locations to store 4033 34 values. 4034 4035 When d_nnz, o_nnz parameters are specified, the storage is specified 4036 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4037 In the above case the values for d_nnz,o_nnz are: 4038 .vb 4039 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4040 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4041 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4042 .ve 4043 Here the space allocated is sum of all the above values i.e 34, and 4044 hence pre-allocation is perfect. 4045 4046 Level: intermediate 4047 4048 .keywords: matrix, aij, compressed row, sparse, parallel 4049 4050 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4051 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4052 @*/ 4053 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4054 { 4055 PetscErrorCode ierr; 4056 4057 PetscFunctionBegin; 4058 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4059 PetscValidType(B,1); 4060 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4061 PetscFunctionReturn(0); 4062 } 4063 4064 #undef __FUNCT__ 4065 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4066 /*@ 4067 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4068 CSR format the local rows. 4069 4070 Collective on MPI_Comm 4071 4072 Input Parameters: 4073 + comm - MPI communicator 4074 . m - number of local rows (Cannot be PETSC_DECIDE) 4075 . n - This value should be the same as the local size used in creating the 4076 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4077 calculated if N is given) For square matrices n is almost always m. 4078 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4079 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4080 . i - row indices 4081 . j - column indices 4082 - a - matrix values 4083 4084 Output Parameter: 4085 . mat - the matrix 4086 4087 Level: intermediate 4088 4089 Notes: 4090 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4091 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4092 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4093 4094 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4095 4096 The format which is used for the sparse matrix input, is equivalent to a 4097 row-major ordering.. i.e for the following matrix, the input data expected is 4098 as shown: 4099 4100 1 0 0 4101 2 0 3 P0 4102 ------- 4103 4 5 6 P1 4104 4105 Process0 [P0]: rows_owned=[0,1] 4106 i = {0,1,3} [size = nrow+1 = 2+1] 4107 j = {0,0,2} [size = nz = 6] 4108 v = {1,2,3} [size = nz = 6] 4109 4110 Process1 [P1]: rows_owned=[2] 4111 i = {0,3} [size = nrow+1 = 1+1] 4112 j = {0,1,2} [size = nz = 6] 4113 v = {4,5,6} [size = nz = 6] 4114 4115 .keywords: matrix, aij, compressed row, sparse, parallel 4116 4117 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4118 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4119 @*/ 4120 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4121 { 4122 PetscErrorCode ierr; 4123 4124 PetscFunctionBegin; 4125 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4126 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4127 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4128 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4129 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4130 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4131 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4132 PetscFunctionReturn(0); 4133 } 4134 4135 #undef __FUNCT__ 4136 #define __FUNCT__ "MatCreateAIJ" 4137 /*@C 4138 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4139 (the default parallel PETSc format). For good matrix assembly performance 4140 the user should preallocate the matrix storage by setting the parameters 4141 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4142 performance can be increased by more than a factor of 50. 4143 4144 Collective on MPI_Comm 4145 4146 Input Parameters: 4147 + comm - MPI communicator 4148 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4149 This value should be the same as the local size used in creating the 4150 y vector for the matrix-vector product y = Ax. 4151 . n - This value should be the same as the local size used in creating the 4152 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4153 calculated if N is given) For square matrices n is almost always m. 4154 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4155 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4156 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4157 (same value is used for all local rows) 4158 . d_nnz - array containing the number of nonzeros in the various rows of the 4159 DIAGONAL portion of the local submatrix (possibly different for each row) 4160 or NULL, if d_nz is used to specify the nonzero structure. 4161 The size of this array is equal to the number of local rows, i.e 'm'. 4162 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4163 submatrix (same value is used for all local rows). 4164 - o_nnz - array containing the number of nonzeros in the various rows of the 4165 OFF-DIAGONAL portion of the local submatrix (possibly different for 4166 each row) or NULL, if o_nz is used to specify the nonzero 4167 structure. The size of this array is equal to the number 4168 of local rows, i.e 'm'. 4169 4170 Output Parameter: 4171 . A - the matrix 4172 4173 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4174 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4175 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4176 4177 Notes: 4178 If the *_nnz parameter is given then the *_nz parameter is ignored 4179 4180 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4181 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4182 storage requirements for this matrix. 4183 4184 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4185 processor than it must be used on all processors that share the object for 4186 that argument. 4187 4188 The user MUST specify either the local or global matrix dimensions 4189 (possibly both). 4190 4191 The parallel matrix is partitioned across processors such that the 4192 first m0 rows belong to process 0, the next m1 rows belong to 4193 process 1, the next m2 rows belong to process 2 etc.. where 4194 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4195 values corresponding to [m x N] submatrix. 4196 4197 The columns are logically partitioned with the n0 columns belonging 4198 to 0th partition, the next n1 columns belonging to the next 4199 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4200 4201 The DIAGONAL portion of the local submatrix on any given processor 4202 is the submatrix corresponding to the rows and columns m,n 4203 corresponding to the given processor. i.e diagonal matrix on 4204 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4205 etc. The remaining portion of the local submatrix [m x (N-n)] 4206 constitute the OFF-DIAGONAL portion. The example below better 4207 illustrates this concept. 4208 4209 For a square global matrix we define each processor's diagonal portion 4210 to be its local rows and the corresponding columns (a square submatrix); 4211 each processor's off-diagonal portion encompasses the remainder of the 4212 local matrix (a rectangular submatrix). 4213 4214 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4215 4216 When calling this routine with a single process communicator, a matrix of 4217 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4218 type of communicator, use the construction mechanism: 4219 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4220 4221 By default, this format uses inodes (identical nodes) when possible. 4222 We search for consecutive rows with the same nonzero structure, thereby 4223 reusing matrix information to achieve increased efficiency. 4224 4225 Options Database Keys: 4226 + -mat_no_inode - Do not use inodes 4227 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4228 - -mat_aij_oneindex - Internally use indexing starting at 1 4229 rather than 0. Note that when calling MatSetValues(), 4230 the user still MUST index entries starting at 0! 4231 4232 4233 Example usage: 4234 4235 Consider the following 8x8 matrix with 34 non-zero values, that is 4236 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4237 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4238 as follows: 4239 4240 .vb 4241 1 2 0 | 0 3 0 | 0 4 4242 Proc0 0 5 6 | 7 0 0 | 8 0 4243 9 0 10 | 11 0 0 | 12 0 4244 ------------------------------------- 4245 13 0 14 | 15 16 17 | 0 0 4246 Proc1 0 18 0 | 19 20 21 | 0 0 4247 0 0 0 | 22 23 0 | 24 0 4248 ------------------------------------- 4249 Proc2 25 26 27 | 0 0 28 | 29 0 4250 30 0 0 | 31 32 33 | 0 34 4251 .ve 4252 4253 This can be represented as a collection of submatrices as: 4254 4255 .vb 4256 A B C 4257 D E F 4258 G H I 4259 .ve 4260 4261 Where the submatrices A,B,C are owned by proc0, D,E,F are 4262 owned by proc1, G,H,I are owned by proc2. 4263 4264 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4265 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4266 The 'M','N' parameters are 8,8, and have the same values on all procs. 4267 4268 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4269 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4270 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4271 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4272 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4273 matrix, ans [DF] as another SeqAIJ matrix. 4274 4275 When d_nz, o_nz parameters are specified, d_nz storage elements are 4276 allocated for every row of the local diagonal submatrix, and o_nz 4277 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4278 One way to choose d_nz and o_nz is to use the max nonzerors per local 4279 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4280 In this case, the values of d_nz,o_nz are: 4281 .vb 4282 proc0 : dnz = 2, o_nz = 2 4283 proc1 : dnz = 3, o_nz = 2 4284 proc2 : dnz = 1, o_nz = 4 4285 .ve 4286 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4287 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4288 for proc3. i.e we are using 12+15+10=37 storage locations to store 4289 34 values. 4290 4291 When d_nnz, o_nnz parameters are specified, the storage is specified 4292 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4293 In the above case the values for d_nnz,o_nnz are: 4294 .vb 4295 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4296 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4297 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4298 .ve 4299 Here the space allocated is sum of all the above values i.e 34, and 4300 hence pre-allocation is perfect. 4301 4302 Level: intermediate 4303 4304 .keywords: matrix, aij, compressed row, sparse, parallel 4305 4306 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4307 MPIAIJ, MatCreateMPIAIJWithArrays() 4308 @*/ 4309 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4310 { 4311 PetscErrorCode ierr; 4312 PetscMPIInt size; 4313 4314 PetscFunctionBegin; 4315 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4316 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4317 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4318 if (size > 1) { 4319 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4320 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4321 } else { 4322 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4323 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4324 } 4325 PetscFunctionReturn(0); 4326 } 4327 4328 #undef __FUNCT__ 4329 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4330 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4331 { 4332 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4333 4334 PetscFunctionBegin; 4335 *Ad = a->A; 4336 *Ao = a->B; 4337 *colmap = a->garray; 4338 PetscFunctionReturn(0); 4339 } 4340 4341 #undef __FUNCT__ 4342 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4343 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4344 { 4345 PetscErrorCode ierr; 4346 PetscInt i; 4347 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4348 4349 PetscFunctionBegin; 4350 if (coloring->ctype == IS_COLORING_GLOBAL) { 4351 ISColoringValue *allcolors,*colors; 4352 ISColoring ocoloring; 4353 4354 /* set coloring for diagonal portion */ 4355 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4356 4357 /* set coloring for off-diagonal portion */ 4358 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4359 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4360 for (i=0; i<a->B->cmap->n; i++) { 4361 colors[i] = allcolors[a->garray[i]]; 4362 } 4363 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4364 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4365 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4366 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4367 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4368 ISColoringValue *colors; 4369 PetscInt *larray; 4370 ISColoring ocoloring; 4371 4372 /* set coloring for diagonal portion */ 4373 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4374 for (i=0; i<a->A->cmap->n; i++) { 4375 larray[i] = i + A->cmap->rstart; 4376 } 4377 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4378 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4379 for (i=0; i<a->A->cmap->n; i++) { 4380 colors[i] = coloring->colors[larray[i]]; 4381 } 4382 ierr = PetscFree(larray);CHKERRQ(ierr); 4383 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4384 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4385 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4386 4387 /* set coloring for off-diagonal portion */ 4388 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4389 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4390 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4391 for (i=0; i<a->B->cmap->n; i++) { 4392 colors[i] = coloring->colors[larray[i]]; 4393 } 4394 ierr = PetscFree(larray);CHKERRQ(ierr); 4395 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4396 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4397 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4398 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4399 PetscFunctionReturn(0); 4400 } 4401 4402 #undef __FUNCT__ 4403 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4404 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4405 { 4406 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4407 PetscErrorCode ierr; 4408 4409 PetscFunctionBegin; 4410 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4411 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4412 PetscFunctionReturn(0); 4413 } 4414 4415 #undef __FUNCT__ 4416 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4417 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4418 { 4419 PetscErrorCode ierr; 4420 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4421 PetscInt *indx; 4422 4423 PetscFunctionBegin; 4424 /* This routine will ONLY return MPIAIJ type matrix */ 4425 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4426 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4427 if (n == PETSC_DECIDE) { 4428 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4429 } 4430 /* Check sum(n) = N */ 4431 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4432 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4433 4434 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4435 rstart -= m; 4436 4437 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4438 for (i=0; i<m; i++) { 4439 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4440 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4441 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4442 } 4443 4444 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4445 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4446 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4447 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4448 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4449 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4450 PetscFunctionReturn(0); 4451 } 4452 4453 #undef __FUNCT__ 4454 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4455 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4456 { 4457 PetscErrorCode ierr; 4458 PetscInt m,N,i,rstart,nnz,Ii; 4459 PetscInt *indx; 4460 PetscScalar *values; 4461 4462 PetscFunctionBegin; 4463 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4464 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4465 for (i=0; i<m; i++) { 4466 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4467 Ii = i + rstart; 4468 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4469 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4470 } 4471 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4472 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4473 PetscFunctionReturn(0); 4474 } 4475 4476 #undef __FUNCT__ 4477 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4478 /*@ 4479 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4480 matrices from each processor 4481 4482 Collective on MPI_Comm 4483 4484 Input Parameters: 4485 + comm - the communicators the parallel matrix will live on 4486 . inmat - the input sequential matrices 4487 . n - number of local columns (or PETSC_DECIDE) 4488 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4489 4490 Output Parameter: 4491 . outmat - the parallel matrix generated 4492 4493 Level: advanced 4494 4495 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4496 4497 @*/ 4498 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4499 { 4500 PetscErrorCode ierr; 4501 4502 PetscFunctionBegin; 4503 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4504 if (scall == MAT_INITIAL_MATRIX) { 4505 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4506 } 4507 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4508 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4509 PetscFunctionReturn(0); 4510 } 4511 4512 #undef __FUNCT__ 4513 #define __FUNCT__ "MatFileSplit" 4514 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4515 { 4516 PetscErrorCode ierr; 4517 PetscMPIInt rank; 4518 PetscInt m,N,i,rstart,nnz; 4519 size_t len; 4520 const PetscInt *indx; 4521 PetscViewer out; 4522 char *name; 4523 Mat B; 4524 const PetscScalar *values; 4525 4526 PetscFunctionBegin; 4527 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4528 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4529 /* Should this be the type of the diagonal block of A? */ 4530 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4531 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4532 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4533 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4534 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4535 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4536 for (i=0; i<m; i++) { 4537 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4538 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4539 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4540 } 4541 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4542 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4543 4544 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4545 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4546 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4547 sprintf(name,"%s.%d",outfile,rank); 4548 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4549 ierr = PetscFree(name);CHKERRQ(ierr); 4550 ierr = MatView(B,out);CHKERRQ(ierr); 4551 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4552 ierr = MatDestroy(&B);CHKERRQ(ierr); 4553 PetscFunctionReturn(0); 4554 } 4555 4556 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4557 #undef __FUNCT__ 4558 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4559 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4560 { 4561 PetscErrorCode ierr; 4562 Mat_Merge_SeqsToMPI *merge; 4563 PetscContainer container; 4564 4565 PetscFunctionBegin; 4566 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4567 if (container) { 4568 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4569 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4570 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4571 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4572 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4573 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4574 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4575 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4576 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4577 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4578 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4579 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4580 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4581 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4582 ierr = PetscFree(merge);CHKERRQ(ierr); 4583 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4584 } 4585 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4586 PetscFunctionReturn(0); 4587 } 4588 4589 #include <../src/mat/utils/freespace.h> 4590 #include <petscbt.h> 4591 4592 #undef __FUNCT__ 4593 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4594 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4595 { 4596 PetscErrorCode ierr; 4597 MPI_Comm comm; 4598 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4599 PetscMPIInt size,rank,taga,*len_s; 4600 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4601 PetscInt proc,m; 4602 PetscInt **buf_ri,**buf_rj; 4603 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4604 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4605 MPI_Request *s_waits,*r_waits; 4606 MPI_Status *status; 4607 MatScalar *aa=a->a; 4608 MatScalar **abuf_r,*ba_i; 4609 Mat_Merge_SeqsToMPI *merge; 4610 PetscContainer container; 4611 4612 PetscFunctionBegin; 4613 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4614 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4615 4616 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4617 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4618 4619 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4620 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4621 4622 bi = merge->bi; 4623 bj = merge->bj; 4624 buf_ri = merge->buf_ri; 4625 buf_rj = merge->buf_rj; 4626 4627 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4628 owners = merge->rowmap->range; 4629 len_s = merge->len_s; 4630 4631 /* send and recv matrix values */ 4632 /*-----------------------------*/ 4633 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4634 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4635 4636 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4637 for (proc=0,k=0; proc<size; proc++) { 4638 if (!len_s[proc]) continue; 4639 i = owners[proc]; 4640 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4641 k++; 4642 } 4643 4644 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4645 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4646 ierr = PetscFree(status);CHKERRQ(ierr); 4647 4648 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4649 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4650 4651 /* insert mat values of mpimat */ 4652 /*----------------------------*/ 4653 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4654 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4655 4656 for (k=0; k<merge->nrecv; k++) { 4657 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4658 nrows = *(buf_ri_k[k]); 4659 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4660 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4661 } 4662 4663 /* set values of ba */ 4664 m = merge->rowmap->n; 4665 for (i=0; i<m; i++) { 4666 arow = owners[rank] + i; 4667 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4668 bnzi = bi[i+1] - bi[i]; 4669 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4670 4671 /* add local non-zero vals of this proc's seqmat into ba */ 4672 anzi = ai[arow+1] - ai[arow]; 4673 aj = a->j + ai[arow]; 4674 aa = a->a + ai[arow]; 4675 nextaj = 0; 4676 for (j=0; nextaj<anzi; j++) { 4677 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4678 ba_i[j] += aa[nextaj++]; 4679 } 4680 } 4681 4682 /* add received vals into ba */ 4683 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4684 /* i-th row */ 4685 if (i == *nextrow[k]) { 4686 anzi = *(nextai[k]+1) - *nextai[k]; 4687 aj = buf_rj[k] + *(nextai[k]); 4688 aa = abuf_r[k] + *(nextai[k]); 4689 nextaj = 0; 4690 for (j=0; nextaj<anzi; j++) { 4691 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4692 ba_i[j] += aa[nextaj++]; 4693 } 4694 } 4695 nextrow[k]++; nextai[k]++; 4696 } 4697 } 4698 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4699 } 4700 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4701 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4702 4703 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4704 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4705 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4706 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4707 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4708 PetscFunctionReturn(0); 4709 } 4710 4711 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4712 4713 #undef __FUNCT__ 4714 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4715 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4716 { 4717 PetscErrorCode ierr; 4718 Mat B_mpi; 4719 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4720 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4721 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4722 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4723 PetscInt len,proc,*dnz,*onz,bs,cbs; 4724 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4725 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4726 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4727 MPI_Status *status; 4728 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4729 PetscBT lnkbt; 4730 Mat_Merge_SeqsToMPI *merge; 4731 PetscContainer container; 4732 4733 PetscFunctionBegin; 4734 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4735 4736 /* make sure it is a PETSc comm */ 4737 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4738 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4739 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4740 4741 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4742 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4743 4744 /* determine row ownership */ 4745 /*---------------------------------------------------------*/ 4746 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4747 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4748 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4749 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4750 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4751 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4752 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4753 4754 m = merge->rowmap->n; 4755 owners = merge->rowmap->range; 4756 4757 /* determine the number of messages to send, their lengths */ 4758 /*---------------------------------------------------------*/ 4759 len_s = merge->len_s; 4760 4761 len = 0; /* length of buf_si[] */ 4762 merge->nsend = 0; 4763 for (proc=0; proc<size; proc++) { 4764 len_si[proc] = 0; 4765 if (proc == rank) { 4766 len_s[proc] = 0; 4767 } else { 4768 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4769 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4770 } 4771 if (len_s[proc]) { 4772 merge->nsend++; 4773 nrows = 0; 4774 for (i=owners[proc]; i<owners[proc+1]; i++) { 4775 if (ai[i+1] > ai[i]) nrows++; 4776 } 4777 len_si[proc] = 2*(nrows+1); 4778 len += len_si[proc]; 4779 } 4780 } 4781 4782 /* determine the number and length of messages to receive for ij-structure */ 4783 /*-------------------------------------------------------------------------*/ 4784 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4785 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4786 4787 /* post the Irecv of j-structure */ 4788 /*-------------------------------*/ 4789 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4790 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4791 4792 /* post the Isend of j-structure */ 4793 /*--------------------------------*/ 4794 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4795 4796 for (proc=0, k=0; proc<size; proc++) { 4797 if (!len_s[proc]) continue; 4798 i = owners[proc]; 4799 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4800 k++; 4801 } 4802 4803 /* receives and sends of j-structure are complete */ 4804 /*------------------------------------------------*/ 4805 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4806 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4807 4808 /* send and recv i-structure */ 4809 /*---------------------------*/ 4810 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4811 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4812 4813 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4814 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4815 for (proc=0,k=0; proc<size; proc++) { 4816 if (!len_s[proc]) continue; 4817 /* form outgoing message for i-structure: 4818 buf_si[0]: nrows to be sent 4819 [1:nrows]: row index (global) 4820 [nrows+1:2*nrows+1]: i-structure index 4821 */ 4822 /*-------------------------------------------*/ 4823 nrows = len_si[proc]/2 - 1; 4824 buf_si_i = buf_si + nrows+1; 4825 buf_si[0] = nrows; 4826 buf_si_i[0] = 0; 4827 nrows = 0; 4828 for (i=owners[proc]; i<owners[proc+1]; i++) { 4829 anzi = ai[i+1] - ai[i]; 4830 if (anzi) { 4831 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4832 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4833 nrows++; 4834 } 4835 } 4836 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4837 k++; 4838 buf_si += len_si[proc]; 4839 } 4840 4841 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4842 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4843 4844 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4845 for (i=0; i<merge->nrecv; i++) { 4846 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4847 } 4848 4849 ierr = PetscFree(len_si);CHKERRQ(ierr); 4850 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4851 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4852 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4853 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4854 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4855 ierr = PetscFree(status);CHKERRQ(ierr); 4856 4857 /* compute a local seq matrix in each processor */ 4858 /*----------------------------------------------*/ 4859 /* allocate bi array and free space for accumulating nonzero column info */ 4860 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4861 bi[0] = 0; 4862 4863 /* create and initialize a linked list */ 4864 nlnk = N+1; 4865 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4866 4867 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4868 len = ai[owners[rank+1]] - ai[owners[rank]]; 4869 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4870 4871 current_space = free_space; 4872 4873 /* determine symbolic info for each local row */ 4874 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4875 4876 for (k=0; k<merge->nrecv; k++) { 4877 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4878 nrows = *buf_ri_k[k]; 4879 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4880 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4881 } 4882 4883 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4884 len = 0; 4885 for (i=0; i<m; i++) { 4886 bnzi = 0; 4887 /* add local non-zero cols of this proc's seqmat into lnk */ 4888 arow = owners[rank] + i; 4889 anzi = ai[arow+1] - ai[arow]; 4890 aj = a->j + ai[arow]; 4891 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4892 bnzi += nlnk; 4893 /* add received col data into lnk */ 4894 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4895 if (i == *nextrow[k]) { /* i-th row */ 4896 anzi = *(nextai[k]+1) - *nextai[k]; 4897 aj = buf_rj[k] + *nextai[k]; 4898 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4899 bnzi += nlnk; 4900 nextrow[k]++; nextai[k]++; 4901 } 4902 } 4903 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4904 4905 /* if free space is not available, make more free space */ 4906 if (current_space->local_remaining<bnzi) { 4907 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4908 nspacedouble++; 4909 } 4910 /* copy data into free space, then initialize lnk */ 4911 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4912 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4913 4914 current_space->array += bnzi; 4915 current_space->local_used += bnzi; 4916 current_space->local_remaining -= bnzi; 4917 4918 bi[i+1] = bi[i] + bnzi; 4919 } 4920 4921 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4922 4923 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4924 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4925 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4926 4927 /* create symbolic parallel matrix B_mpi */ 4928 /*---------------------------------------*/ 4929 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4930 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4931 if (n==PETSC_DECIDE) { 4932 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4933 } else { 4934 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4935 } 4936 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4937 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4938 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4939 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4940 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4941 4942 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4943 B_mpi->assembled = PETSC_FALSE; 4944 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4945 merge->bi = bi; 4946 merge->bj = bj; 4947 merge->buf_ri = buf_ri; 4948 merge->buf_rj = buf_rj; 4949 merge->coi = NULL; 4950 merge->coj = NULL; 4951 merge->owners_co = NULL; 4952 4953 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4954 4955 /* attach the supporting struct to B_mpi for reuse */ 4956 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4957 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4958 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4959 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4960 *mpimat = B_mpi; 4961 4962 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4963 PetscFunctionReturn(0); 4964 } 4965 4966 #undef __FUNCT__ 4967 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4968 /*@C 4969 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4970 matrices from each processor 4971 4972 Collective on MPI_Comm 4973 4974 Input Parameters: 4975 + comm - the communicators the parallel matrix will live on 4976 . seqmat - the input sequential matrices 4977 . m - number of local rows (or PETSC_DECIDE) 4978 . n - number of local columns (or PETSC_DECIDE) 4979 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4980 4981 Output Parameter: 4982 . mpimat - the parallel matrix generated 4983 4984 Level: advanced 4985 4986 Notes: 4987 The dimensions of the sequential matrix in each processor MUST be the same. 4988 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4989 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4990 @*/ 4991 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4992 { 4993 PetscErrorCode ierr; 4994 PetscMPIInt size; 4995 4996 PetscFunctionBegin; 4997 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4998 if (size == 1) { 4999 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5000 if (scall == MAT_INITIAL_MATRIX) { 5001 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5002 } else { 5003 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5004 } 5005 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5006 PetscFunctionReturn(0); 5007 } 5008 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5009 if (scall == MAT_INITIAL_MATRIX) { 5010 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5011 } 5012 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5013 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5014 PetscFunctionReturn(0); 5015 } 5016 5017 #undef __FUNCT__ 5018 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5019 /*@ 5020 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5021 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5022 with MatGetSize() 5023 5024 Not Collective 5025 5026 Input Parameters: 5027 + A - the matrix 5028 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5029 5030 Output Parameter: 5031 . A_loc - the local sequential matrix generated 5032 5033 Level: developer 5034 5035 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5036 5037 @*/ 5038 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5039 { 5040 PetscErrorCode ierr; 5041 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5042 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5043 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5044 MatScalar *aa=a->a,*ba=b->a,*cam; 5045 PetscScalar *ca; 5046 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5047 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5048 PetscBool match; 5049 5050 PetscFunctionBegin; 5051 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5052 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5053 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5054 if (scall == MAT_INITIAL_MATRIX) { 5055 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5056 ci[0] = 0; 5057 for (i=0; i<am; i++) { 5058 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5059 } 5060 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5061 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5062 k = 0; 5063 for (i=0; i<am; i++) { 5064 ncols_o = bi[i+1] - bi[i]; 5065 ncols_d = ai[i+1] - ai[i]; 5066 /* off-diagonal portion of A */ 5067 for (jo=0; jo<ncols_o; jo++) { 5068 col = cmap[*bj]; 5069 if (col >= cstart) break; 5070 cj[k] = col; bj++; 5071 ca[k++] = *ba++; 5072 } 5073 /* diagonal portion of A */ 5074 for (j=0; j<ncols_d; j++) { 5075 cj[k] = cstart + *aj++; 5076 ca[k++] = *aa++; 5077 } 5078 /* off-diagonal portion of A */ 5079 for (j=jo; j<ncols_o; j++) { 5080 cj[k] = cmap[*bj++]; 5081 ca[k++] = *ba++; 5082 } 5083 } 5084 /* put together the new matrix */ 5085 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5086 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5087 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5088 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5089 mat->free_a = PETSC_TRUE; 5090 mat->free_ij = PETSC_TRUE; 5091 mat->nonew = 0; 5092 } else if (scall == MAT_REUSE_MATRIX) { 5093 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5094 ci = mat->i; cj = mat->j; cam = mat->a; 5095 for (i=0; i<am; i++) { 5096 /* off-diagonal portion of A */ 5097 ncols_o = bi[i+1] - bi[i]; 5098 for (jo=0; jo<ncols_o; jo++) { 5099 col = cmap[*bj]; 5100 if (col >= cstart) break; 5101 *cam++ = *ba++; bj++; 5102 } 5103 /* diagonal portion of A */ 5104 ncols_d = ai[i+1] - ai[i]; 5105 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5106 /* off-diagonal portion of A */ 5107 for (j=jo; j<ncols_o; j++) { 5108 *cam++ = *ba++; bj++; 5109 } 5110 } 5111 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5112 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5113 PetscFunctionReturn(0); 5114 } 5115 5116 #undef __FUNCT__ 5117 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5118 /*@C 5119 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5120 5121 Not Collective 5122 5123 Input Parameters: 5124 + A - the matrix 5125 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5126 - row, col - index sets of rows and columns to extract (or NULL) 5127 5128 Output Parameter: 5129 . A_loc - the local sequential matrix generated 5130 5131 Level: developer 5132 5133 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5134 5135 @*/ 5136 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5137 { 5138 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5139 PetscErrorCode ierr; 5140 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5141 IS isrowa,iscola; 5142 Mat *aloc; 5143 PetscBool match; 5144 5145 PetscFunctionBegin; 5146 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5147 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5148 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5149 if (!row) { 5150 start = A->rmap->rstart; end = A->rmap->rend; 5151 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5152 } else { 5153 isrowa = *row; 5154 } 5155 if (!col) { 5156 start = A->cmap->rstart; 5157 cmap = a->garray; 5158 nzA = a->A->cmap->n; 5159 nzB = a->B->cmap->n; 5160 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5161 ncols = 0; 5162 for (i=0; i<nzB; i++) { 5163 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5164 else break; 5165 } 5166 imark = i; 5167 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5168 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5169 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5170 } else { 5171 iscola = *col; 5172 } 5173 if (scall != MAT_INITIAL_MATRIX) { 5174 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5175 aloc[0] = *A_loc; 5176 } 5177 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5178 *A_loc = aloc[0]; 5179 ierr = PetscFree(aloc);CHKERRQ(ierr); 5180 if (!row) { 5181 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5182 } 5183 if (!col) { 5184 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5185 } 5186 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5187 PetscFunctionReturn(0); 5188 } 5189 5190 #undef __FUNCT__ 5191 #define __FUNCT__ "MatGetBrowsOfAcols" 5192 /*@C 5193 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5194 5195 Collective on Mat 5196 5197 Input Parameters: 5198 + A,B - the matrices in mpiaij format 5199 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5200 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5201 5202 Output Parameter: 5203 + rowb, colb - index sets of rows and columns of B to extract 5204 - B_seq - the sequential matrix generated 5205 5206 Level: developer 5207 5208 @*/ 5209 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5210 { 5211 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5212 PetscErrorCode ierr; 5213 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5214 IS isrowb,iscolb; 5215 Mat *bseq=NULL; 5216 5217 PetscFunctionBegin; 5218 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5219 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5220 } 5221 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5222 5223 if (scall == MAT_INITIAL_MATRIX) { 5224 start = A->cmap->rstart; 5225 cmap = a->garray; 5226 nzA = a->A->cmap->n; 5227 nzB = a->B->cmap->n; 5228 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5229 ncols = 0; 5230 for (i=0; i<nzB; i++) { /* row < local row index */ 5231 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5232 else break; 5233 } 5234 imark = i; 5235 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5236 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5237 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5238 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5239 } else { 5240 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5241 isrowb = *rowb; iscolb = *colb; 5242 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5243 bseq[0] = *B_seq; 5244 } 5245 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5246 *B_seq = bseq[0]; 5247 ierr = PetscFree(bseq);CHKERRQ(ierr); 5248 if (!rowb) { 5249 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5250 } else { 5251 *rowb = isrowb; 5252 } 5253 if (!colb) { 5254 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5255 } else { 5256 *colb = iscolb; 5257 } 5258 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5259 PetscFunctionReturn(0); 5260 } 5261 5262 #undef __FUNCT__ 5263 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5264 /* 5265 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5266 of the OFF-DIAGONAL portion of local A 5267 5268 Collective on Mat 5269 5270 Input Parameters: 5271 + A,B - the matrices in mpiaij format 5272 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5273 5274 Output Parameter: 5275 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5276 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5277 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5278 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5279 5280 Level: developer 5281 5282 */ 5283 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5284 { 5285 VecScatter_MPI_General *gen_to,*gen_from; 5286 PetscErrorCode ierr; 5287 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5288 Mat_SeqAIJ *b_oth; 5289 VecScatter ctx =a->Mvctx; 5290 MPI_Comm comm; 5291 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5292 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5293 PetscScalar *rvalues,*svalues; 5294 MatScalar *b_otha,*bufa,*bufA; 5295 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5296 MPI_Request *rwaits = NULL,*swaits = NULL; 5297 MPI_Status *sstatus,rstatus; 5298 PetscMPIInt jj; 5299 PetscInt *cols,sbs,rbs; 5300 PetscScalar *vals; 5301 5302 PetscFunctionBegin; 5303 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5304 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5305 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5306 } 5307 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5308 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5309 5310 gen_to = (VecScatter_MPI_General*)ctx->todata; 5311 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5312 rvalues = gen_from->values; /* holds the length of receiving row */ 5313 svalues = gen_to->values; /* holds the length of sending row */ 5314 nrecvs = gen_from->n; 5315 nsends = gen_to->n; 5316 5317 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5318 srow = gen_to->indices; /* local row index to be sent */ 5319 sstarts = gen_to->starts; 5320 sprocs = gen_to->procs; 5321 sstatus = gen_to->sstatus; 5322 sbs = gen_to->bs; 5323 rstarts = gen_from->starts; 5324 rprocs = gen_from->procs; 5325 rbs = gen_from->bs; 5326 5327 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5328 if (scall == MAT_INITIAL_MATRIX) { 5329 /* i-array */ 5330 /*---------*/ 5331 /* post receives */ 5332 for (i=0; i<nrecvs; i++) { 5333 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5334 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5335 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5336 } 5337 5338 /* pack the outgoing message */ 5339 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5340 5341 sstartsj[0] = 0; 5342 rstartsj[0] = 0; 5343 len = 0; /* total length of j or a array to be sent */ 5344 k = 0; 5345 for (i=0; i<nsends; i++) { 5346 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5347 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5348 for (j=0; j<nrows; j++) { 5349 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5350 for (l=0; l<sbs; l++) { 5351 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5352 5353 rowlen[j*sbs+l] = ncols; 5354 5355 len += ncols; 5356 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5357 } 5358 k++; 5359 } 5360 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5361 5362 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5363 } 5364 /* recvs and sends of i-array are completed */ 5365 i = nrecvs; 5366 while (i--) { 5367 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5368 } 5369 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5370 5371 /* allocate buffers for sending j and a arrays */ 5372 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5373 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5374 5375 /* create i-array of B_oth */ 5376 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5377 5378 b_othi[0] = 0; 5379 len = 0; /* total length of j or a array to be received */ 5380 k = 0; 5381 for (i=0; i<nrecvs; i++) { 5382 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5383 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5384 for (j=0; j<nrows; j++) { 5385 b_othi[k+1] = b_othi[k] + rowlen[j]; 5386 len += rowlen[j]; k++; 5387 } 5388 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5389 } 5390 5391 /* allocate space for j and a arrrays of B_oth */ 5392 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5393 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5394 5395 /* j-array */ 5396 /*---------*/ 5397 /* post receives of j-array */ 5398 for (i=0; i<nrecvs; i++) { 5399 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5400 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5401 } 5402 5403 /* pack the outgoing message j-array */ 5404 k = 0; 5405 for (i=0; i<nsends; i++) { 5406 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5407 bufJ = bufj+sstartsj[i]; 5408 for (j=0; j<nrows; j++) { 5409 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5410 for (ll=0; ll<sbs; ll++) { 5411 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5412 for (l=0; l<ncols; l++) { 5413 *bufJ++ = cols[l]; 5414 } 5415 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5416 } 5417 } 5418 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5419 } 5420 5421 /* recvs and sends of j-array are completed */ 5422 i = nrecvs; 5423 while (i--) { 5424 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5425 } 5426 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5427 } else if (scall == MAT_REUSE_MATRIX) { 5428 sstartsj = *startsj_s; 5429 rstartsj = *startsj_r; 5430 bufa = *bufa_ptr; 5431 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5432 b_otha = b_oth->a; 5433 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5434 5435 /* a-array */ 5436 /*---------*/ 5437 /* post receives of a-array */ 5438 for (i=0; i<nrecvs; i++) { 5439 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5440 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5441 } 5442 5443 /* pack the outgoing message a-array */ 5444 k = 0; 5445 for (i=0; i<nsends; i++) { 5446 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5447 bufA = bufa+sstartsj[i]; 5448 for (j=0; j<nrows; j++) { 5449 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5450 for (ll=0; ll<sbs; ll++) { 5451 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5452 for (l=0; l<ncols; l++) { 5453 *bufA++ = vals[l]; 5454 } 5455 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5456 } 5457 } 5458 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5459 } 5460 /* recvs and sends of a-array are completed */ 5461 i = nrecvs; 5462 while (i--) { 5463 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5464 } 5465 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5466 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5467 5468 if (scall == MAT_INITIAL_MATRIX) { 5469 /* put together the new matrix */ 5470 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5471 5472 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5473 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5474 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5475 b_oth->free_a = PETSC_TRUE; 5476 b_oth->free_ij = PETSC_TRUE; 5477 b_oth->nonew = 0; 5478 5479 ierr = PetscFree(bufj);CHKERRQ(ierr); 5480 if (!startsj_s || !bufa_ptr) { 5481 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5482 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5483 } else { 5484 *startsj_s = sstartsj; 5485 *startsj_r = rstartsj; 5486 *bufa_ptr = bufa; 5487 } 5488 } 5489 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5490 PetscFunctionReturn(0); 5491 } 5492 5493 #undef __FUNCT__ 5494 #define __FUNCT__ "MatGetCommunicationStructs" 5495 /*@C 5496 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5497 5498 Not Collective 5499 5500 Input Parameters: 5501 . A - The matrix in mpiaij format 5502 5503 Output Parameter: 5504 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5505 . colmap - A map from global column index to local index into lvec 5506 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5507 5508 Level: developer 5509 5510 @*/ 5511 #if defined(PETSC_USE_CTABLE) 5512 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5513 #else 5514 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5515 #endif 5516 { 5517 Mat_MPIAIJ *a; 5518 5519 PetscFunctionBegin; 5520 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5521 PetscValidPointer(lvec, 2); 5522 PetscValidPointer(colmap, 3); 5523 PetscValidPointer(multScatter, 4); 5524 a = (Mat_MPIAIJ*) A->data; 5525 if (lvec) *lvec = a->lvec; 5526 if (colmap) *colmap = a->colmap; 5527 if (multScatter) *multScatter = a->Mvctx; 5528 PetscFunctionReturn(0); 5529 } 5530 5531 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5532 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5533 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5534 5535 #undef __FUNCT__ 5536 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5537 /* 5538 Computes (B'*A')' since computing B*A directly is untenable 5539 5540 n p p 5541 ( ) ( ) ( ) 5542 m ( A ) * n ( B ) = m ( C ) 5543 ( ) ( ) ( ) 5544 5545 */ 5546 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5547 { 5548 PetscErrorCode ierr; 5549 Mat At,Bt,Ct; 5550 5551 PetscFunctionBegin; 5552 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5553 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5554 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5555 ierr = MatDestroy(&At);CHKERRQ(ierr); 5556 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5557 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5558 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5559 PetscFunctionReturn(0); 5560 } 5561 5562 #undef __FUNCT__ 5563 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5564 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5565 { 5566 PetscErrorCode ierr; 5567 PetscInt m=A->rmap->n,n=B->cmap->n; 5568 Mat Cmat; 5569 5570 PetscFunctionBegin; 5571 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5572 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5573 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5574 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5575 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5576 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5577 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5578 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5579 5580 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5581 5582 *C = Cmat; 5583 PetscFunctionReturn(0); 5584 } 5585 5586 /* ----------------------------------------------------------------*/ 5587 #undef __FUNCT__ 5588 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5589 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5590 { 5591 PetscErrorCode ierr; 5592 5593 PetscFunctionBegin; 5594 if (scall == MAT_INITIAL_MATRIX) { 5595 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5596 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5597 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5598 } 5599 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5600 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5601 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5602 PetscFunctionReturn(0); 5603 } 5604 5605 #if defined(PETSC_HAVE_MUMPS) 5606 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5607 #endif 5608 #if defined(PETSC_HAVE_PASTIX) 5609 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5610 #endif 5611 #if defined(PETSC_HAVE_SUPERLU_DIST) 5612 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5613 #endif 5614 #if defined(PETSC_HAVE_CLIQUE) 5615 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5616 #endif 5617 5618 /*MC 5619 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5620 5621 Options Database Keys: 5622 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5623 5624 Level: beginner 5625 5626 .seealso: MatCreateAIJ() 5627 M*/ 5628 5629 #undef __FUNCT__ 5630 #define __FUNCT__ "MatCreate_MPIAIJ" 5631 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5632 { 5633 Mat_MPIAIJ *b; 5634 PetscErrorCode ierr; 5635 PetscMPIInt size; 5636 5637 PetscFunctionBegin; 5638 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5639 5640 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5641 B->data = (void*)b; 5642 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5643 B->assembled = PETSC_FALSE; 5644 B->insertmode = NOT_SET_VALUES; 5645 b->size = size; 5646 5647 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5648 5649 /* build cache for off array entries formed */ 5650 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5651 5652 b->donotstash = PETSC_FALSE; 5653 b->colmap = 0; 5654 b->garray = 0; 5655 b->roworiented = PETSC_TRUE; 5656 5657 /* stuff used for matrix vector multiply */ 5658 b->lvec = NULL; 5659 b->Mvctx = NULL; 5660 5661 /* stuff for MatGetRow() */ 5662 b->rowindices = 0; 5663 b->rowvalues = 0; 5664 b->getrowactive = PETSC_FALSE; 5665 5666 /* flexible pointer used in CUSP/CUSPARSE classes */ 5667 b->spptr = NULL; 5668 5669 #if defined(PETSC_HAVE_MUMPS) 5670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5671 #endif 5672 #if defined(PETSC_HAVE_PASTIX) 5673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5674 #endif 5675 #if defined(PETSC_HAVE_SUPERLU_DIST) 5676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5677 #endif 5678 #if defined(PETSC_HAVE_CLIQUE) 5679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5680 #endif 5681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5684 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5685 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5686 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5687 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5688 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5689 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5690 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5691 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5692 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5693 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5694 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5695 PetscFunctionReturn(0); 5696 } 5697 5698 #undef __FUNCT__ 5699 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5700 /*@ 5701 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5702 and "off-diagonal" part of the matrix in CSR format. 5703 5704 Collective on MPI_Comm 5705 5706 Input Parameters: 5707 + comm - MPI communicator 5708 . m - number of local rows (Cannot be PETSC_DECIDE) 5709 . n - This value should be the same as the local size used in creating the 5710 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5711 calculated if N is given) For square matrices n is almost always m. 5712 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5713 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5714 . i - row indices for "diagonal" portion of matrix 5715 . j - column indices 5716 . a - matrix values 5717 . oi - row indices for "off-diagonal" portion of matrix 5718 . oj - column indices 5719 - oa - matrix values 5720 5721 Output Parameter: 5722 . mat - the matrix 5723 5724 Level: advanced 5725 5726 Notes: 5727 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5728 must free the arrays once the matrix has been destroyed and not before. 5729 5730 The i and j indices are 0 based 5731 5732 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5733 5734 This sets local rows and cannot be used to set off-processor values. 5735 5736 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5737 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5738 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5739 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5740 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5741 communication if it is known that only local entries will be set. 5742 5743 .keywords: matrix, aij, compressed row, sparse, parallel 5744 5745 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5746 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5747 @*/ 5748 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5749 { 5750 PetscErrorCode ierr; 5751 Mat_MPIAIJ *maij; 5752 5753 PetscFunctionBegin; 5754 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5755 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5756 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5757 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5758 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5759 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5760 maij = (Mat_MPIAIJ*) (*mat)->data; 5761 5762 (*mat)->preallocated = PETSC_TRUE; 5763 5764 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5765 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5766 5767 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5768 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5769 5770 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5771 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5772 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5773 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5774 5775 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5776 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5777 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5778 PetscFunctionReturn(0); 5779 } 5780 5781 /* 5782 Special version for direct calls from Fortran 5783 */ 5784 #include <petsc-private/fortranimpl.h> 5785 5786 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5787 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5788 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5789 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5790 #endif 5791 5792 /* Change these macros so can be used in void function */ 5793 #undef CHKERRQ 5794 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5795 #undef SETERRQ2 5796 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5797 #undef SETERRQ3 5798 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5799 #undef SETERRQ 5800 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5801 5802 #undef __FUNCT__ 5803 #define __FUNCT__ "matsetvaluesmpiaij_" 5804 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5805 { 5806 Mat mat = *mmat; 5807 PetscInt m = *mm, n = *mn; 5808 InsertMode addv = *maddv; 5809 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5810 PetscScalar value; 5811 PetscErrorCode ierr; 5812 5813 MatCheckPreallocated(mat,1); 5814 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5815 5816 #if defined(PETSC_USE_DEBUG) 5817 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5818 #endif 5819 { 5820 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5821 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5822 PetscBool roworiented = aij->roworiented; 5823 5824 /* Some Variables required in the macro */ 5825 Mat A = aij->A; 5826 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5827 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5828 MatScalar *aa = a->a; 5829 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5830 Mat B = aij->B; 5831 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5832 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5833 MatScalar *ba = b->a; 5834 5835 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5836 PetscInt nonew = a->nonew; 5837 MatScalar *ap1,*ap2; 5838 5839 PetscFunctionBegin; 5840 for (i=0; i<m; i++) { 5841 if (im[i] < 0) continue; 5842 #if defined(PETSC_USE_DEBUG) 5843 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5844 #endif 5845 if (im[i] >= rstart && im[i] < rend) { 5846 row = im[i] - rstart; 5847 lastcol1 = -1; 5848 rp1 = aj + ai[row]; 5849 ap1 = aa + ai[row]; 5850 rmax1 = aimax[row]; 5851 nrow1 = ailen[row]; 5852 low1 = 0; 5853 high1 = nrow1; 5854 lastcol2 = -1; 5855 rp2 = bj + bi[row]; 5856 ap2 = ba + bi[row]; 5857 rmax2 = bimax[row]; 5858 nrow2 = bilen[row]; 5859 low2 = 0; 5860 high2 = nrow2; 5861 5862 for (j=0; j<n; j++) { 5863 if (roworiented) value = v[i*n+j]; 5864 else value = v[i+j*m]; 5865 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5866 if (in[j] >= cstart && in[j] < cend) { 5867 col = in[j] - cstart; 5868 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5869 } else if (in[j] < 0) continue; 5870 #if defined(PETSC_USE_DEBUG) 5871 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5872 #endif 5873 else { 5874 if (mat->was_assembled) { 5875 if (!aij->colmap) { 5876 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5877 } 5878 #if defined(PETSC_USE_CTABLE) 5879 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5880 col--; 5881 #else 5882 col = aij->colmap[in[j]] - 1; 5883 #endif 5884 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5885 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5886 col = in[j]; 5887 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5888 B = aij->B; 5889 b = (Mat_SeqAIJ*)B->data; 5890 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5891 rp2 = bj + bi[row]; 5892 ap2 = ba + bi[row]; 5893 rmax2 = bimax[row]; 5894 nrow2 = bilen[row]; 5895 low2 = 0; 5896 high2 = nrow2; 5897 bm = aij->B->rmap->n; 5898 ba = b->a; 5899 } 5900 } else col = in[j]; 5901 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5902 } 5903 } 5904 } else if (!aij->donotstash) { 5905 if (roworiented) { 5906 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5907 } else { 5908 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5909 } 5910 } 5911 } 5912 } 5913 PetscFunctionReturnVoid(); 5914 } 5915 5916