1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 6 /*MC 7 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 8 9 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 10 and MATMPIAIJ otherwise. As a result, for single process communicators, 11 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 12 for communicators controlling multiple processes. It is recommended that you call both of 13 the above preallocation routines for simplicity. 14 15 Options Database Keys: 16 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 17 18 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 19 enough exist. 20 21 Level: beginner 22 23 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 24 M*/ 25 26 /*MC 27 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 28 29 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 30 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 31 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 32 for communicators controlling multiple processes. It is recommended that you call both of 33 the above preallocation routines for simplicity. 34 35 Options Database Keys: 36 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 37 38 Level: beginner 39 40 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 41 M*/ 42 43 #undef __FUNCT__ 44 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 45 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 46 { 47 PetscErrorCode ierr; 48 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 49 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 50 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 51 const PetscInt *ia,*ib; 52 const MatScalar *aa,*bb; 53 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 54 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 55 56 PetscFunctionBegin; 57 *keptrows = 0; 58 ia = a->i; 59 ib = b->i; 60 for (i=0; i<m; i++) { 61 na = ia[i+1] - ia[i]; 62 nb = ib[i+1] - ib[i]; 63 if (!na && !nb) { 64 cnt++; 65 goto ok1; 66 } 67 aa = a->a + ia[i]; 68 for (j=0; j<na; j++) { 69 if (aa[j] != 0.0) goto ok1; 70 } 71 bb = b->a + ib[i]; 72 for (j=0; j <nb; j++) { 73 if (bb[j] != 0.0) goto ok1; 74 } 75 cnt++; 76 ok1:; 77 } 78 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 79 if (!n0rows) PetscFunctionReturn(0); 80 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 81 cnt = 0; 82 for (i=0; i<m; i++) { 83 na = ia[i+1] - ia[i]; 84 nb = ib[i+1] - ib[i]; 85 if (!na && !nb) continue; 86 aa = a->a + ia[i]; 87 for (j=0; j<na;j++) { 88 if (aa[j] != 0.0) { 89 rows[cnt++] = rstart + i; 90 goto ok2; 91 } 92 } 93 bb = b->a + ib[i]; 94 for (j=0; j<nb; j++) { 95 if (bb[j] != 0.0) { 96 rows[cnt++] = rstart + i; 97 goto ok2; 98 } 99 } 100 ok2:; 101 } 102 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 108 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 109 { 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 111 PetscErrorCode ierr; 112 PetscInt i,rstart,nrows,*rows; 113 114 PetscFunctionBegin; 115 *zrows = NULL; 116 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 117 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 118 for (i=0; i<nrows; i++) rows[i] += rstart; 119 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 125 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 126 { 127 PetscErrorCode ierr; 128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 129 PetscInt i,n,*garray = aij->garray; 130 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 131 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 132 PetscReal *work; 133 134 PetscFunctionBegin; 135 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 136 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 137 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 if (!rank) { 206 bses[0] = gmat->rmap->bs; 207 bses[1] = gmat->cmap->bs; 208 } 209 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 210 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 211 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 212 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 213 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 214 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 215 216 rowners[0] = 0; 217 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 218 rstart = rowners[rank]; 219 rend = rowners[rank+1]; 220 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 221 if (!rank) { 222 gmata = (Mat_SeqAIJ*) gmat->data; 223 /* send row lengths to all processors */ 224 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 225 for (i=1; i<size; i++) { 226 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 227 } 228 /* determine number diagonal and off-diagonal counts */ 229 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 230 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 231 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 232 jj = 0; 233 for (i=0; i<m; i++) { 234 for (j=0; j<dlens[i]; j++) { 235 if (gmata->j[jj] < rstart) ld[i]++; 236 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 237 jj++; 238 } 239 } 240 /* send column indices to other processes */ 241 for (i=1; i<size; i++) { 242 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 243 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 245 } 246 247 /* send numerical values to other processes */ 248 for (i=1; i<size; i++) { 249 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 250 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 251 } 252 gmataa = gmata->a; 253 gmataj = gmata->j; 254 255 } else { 256 /* receive row lengths */ 257 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 /* receive column indices */ 259 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 260 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 261 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 262 /* determine number diagonal and off-diagonal counts */ 263 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 264 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 265 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 266 jj = 0; 267 for (i=0; i<m; i++) { 268 for (j=0; j<dlens[i]; j++) { 269 if (gmataj[jj] < rstart) ld[i]++; 270 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 271 jj++; 272 } 273 } 274 /* receive numerical values */ 275 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 276 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 277 } 278 /* set preallocation */ 279 for (i=0; i<m; i++) { 280 dlens[i] -= olens[i]; 281 } 282 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 283 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 284 285 for (i=0; i<m; i++) { 286 dlens[i] += olens[i]; 287 } 288 cnt = 0; 289 for (i=0; i<m; i++) { 290 row = rstart + i; 291 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 292 cnt += dlens[i]; 293 } 294 if (rank) { 295 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 296 } 297 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 298 ierr = PetscFree(rowners);CHKERRQ(ierr); 299 300 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 301 302 *inmat = mat; 303 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 304 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 305 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 306 mat = *inmat; 307 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 308 if (!rank) { 309 /* send numerical values to other processes */ 310 gmata = (Mat_SeqAIJ*) gmat->data; 311 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 312 gmataa = gmata->a; 313 for (i=1; i<size; i++) { 314 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 315 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 316 } 317 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 318 } else { 319 /* receive numerical values from process 0*/ 320 nz = Ad->nz + Ao->nz; 321 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 322 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 323 } 324 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 325 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 326 ad = Ad->a; 327 ao = Ao->a; 328 if (mat->rmap->n) { 329 i = 0; 330 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 331 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 332 } 333 for (i=1; i<mat->rmap->n; i++) { 334 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 335 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 336 } 337 i--; 338 if (mat->rmap->n) { 339 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 340 } 341 if (rank) { 342 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 343 } 344 } 345 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 346 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 /* 351 Local utility routine that creates a mapping from the global column 352 number to the local number in the off-diagonal part of the local 353 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 354 a slightly higher hash table cost; without it it is not scalable (each processor 355 has an order N integer array but is fast to acess. 356 */ 357 #undef __FUNCT__ 358 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 359 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 360 { 361 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 362 PetscErrorCode ierr; 363 PetscInt n = aij->B->cmap->n,i; 364 365 PetscFunctionBegin; 366 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 367 #if defined(PETSC_USE_CTABLE) 368 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 369 for (i=0; i<n; i++) { 370 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 371 } 372 #else 373 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 374 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 375 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 376 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 377 #endif 378 PetscFunctionReturn(0); 379 } 380 381 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 382 { \ 383 if (col <= lastcol1) low1 = 0; \ 384 else high1 = nrow1; \ 385 lastcol1 = col;\ 386 while (high1-low1 > 5) { \ 387 t = (low1+high1)/2; \ 388 if (rp1[t] > col) high1 = t; \ 389 else low1 = t; \ 390 } \ 391 for (_i=low1; _i<high1; _i++) { \ 392 if (rp1[_i] > col) break; \ 393 if (rp1[_i] == col) { \ 394 if (addv == ADD_VALUES) ap1[_i] += value; \ 395 else ap1[_i] = value; \ 396 goto a_noinsert; \ 397 } \ 398 } \ 399 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 400 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 401 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 402 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 403 N = nrow1++ - 1; a->nz++; high1++; \ 404 /* shift up all the later entries in this row */ \ 405 for (ii=N; ii>=_i; ii--) { \ 406 rp1[ii+1] = rp1[ii]; \ 407 ap1[ii+1] = ap1[ii]; \ 408 } \ 409 rp1[_i] = col; \ 410 ap1[_i] = value; \ 411 a_noinsert: ; \ 412 ailen[row] = nrow1; \ 413 } 414 415 416 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 417 { \ 418 if (col <= lastcol2) low2 = 0; \ 419 else high2 = nrow2; \ 420 lastcol2 = col; \ 421 while (high2-low2 > 5) { \ 422 t = (low2+high2)/2; \ 423 if (rp2[t] > col) high2 = t; \ 424 else low2 = t; \ 425 } \ 426 for (_i=low2; _i<high2; _i++) { \ 427 if (rp2[_i] > col) break; \ 428 if (rp2[_i] == col) { \ 429 if (addv == ADD_VALUES) ap2[_i] += value; \ 430 else ap2[_i] = value; \ 431 goto b_noinsert; \ 432 } \ 433 } \ 434 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 435 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 436 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 437 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 438 N = nrow2++ - 1; b->nz++; high2++; \ 439 /* shift up all the later entries in this row */ \ 440 for (ii=N; ii>=_i; ii--) { \ 441 rp2[ii+1] = rp2[ii]; \ 442 ap2[ii+1] = ap2[ii]; \ 443 } \ 444 rp2[_i] = col; \ 445 ap2[_i] = value; \ 446 b_noinsert: ; \ 447 bilen[row] = nrow2; \ 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 452 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 453 { 454 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 455 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 456 PetscErrorCode ierr; 457 PetscInt l,*garray = mat->garray,diag; 458 459 PetscFunctionBegin; 460 /* code only works for square matrices A */ 461 462 /* find size of row to the left of the diagonal part */ 463 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 464 row = row - diag; 465 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 466 if (garray[b->j[b->i[row]+l]] > diag) break; 467 } 468 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 469 470 /* diagonal part */ 471 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 472 473 /* right of diagonal part */ 474 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatSetValues_MPIAIJ" 480 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 481 { 482 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 483 PetscScalar value; 484 PetscErrorCode ierr; 485 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 486 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 487 PetscBool roworiented = aij->roworiented; 488 489 /* Some Variables required in the macro */ 490 Mat A = aij->A; 491 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 492 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 493 MatScalar *aa = a->a; 494 PetscBool ignorezeroentries = a->ignorezeroentries; 495 Mat B = aij->B; 496 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 497 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 498 MatScalar *ba = b->a; 499 500 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 501 PetscInt nonew; 502 MatScalar *ap1,*ap2; 503 504 PetscFunctionBegin; 505 if (v) PetscValidScalarPointer(v,6); 506 for (i=0; i<m; i++) { 507 if (im[i] < 0) continue; 508 #if defined(PETSC_USE_DEBUG) 509 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 510 #endif 511 if (im[i] >= rstart && im[i] < rend) { 512 row = im[i] - rstart; 513 lastcol1 = -1; 514 rp1 = aj + ai[row]; 515 ap1 = aa + ai[row]; 516 rmax1 = aimax[row]; 517 nrow1 = ailen[row]; 518 low1 = 0; 519 high1 = nrow1; 520 lastcol2 = -1; 521 rp2 = bj + bi[row]; 522 ap2 = ba + bi[row]; 523 rmax2 = bimax[row]; 524 nrow2 = bilen[row]; 525 low2 = 0; 526 high2 = nrow2; 527 528 for (j=0; j<n; j++) { 529 if (v) { 530 if (roworiented) value = v[i*n+j]; 531 else value = v[i+j*m]; 532 } else value = 0.0; 533 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 534 if (in[j] >= cstart && in[j] < cend) { 535 col = in[j] - cstart; 536 nonew = a->nonew; 537 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 538 } else if (in[j] < 0) continue; 539 #if defined(PETSC_USE_DEBUG) 540 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 541 #endif 542 else { 543 if (mat->was_assembled) { 544 if (!aij->colmap) { 545 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 546 } 547 #if defined(PETSC_USE_CTABLE) 548 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 549 col--; 550 #else 551 col = aij->colmap[in[j]] - 1; 552 #endif 553 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 554 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 555 col = in[j]; 556 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 557 B = aij->B; 558 b = (Mat_SeqAIJ*)B->data; 559 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 560 rp2 = bj + bi[row]; 561 ap2 = ba + bi[row]; 562 rmax2 = bimax[row]; 563 nrow2 = bilen[row]; 564 low2 = 0; 565 high2 = nrow2; 566 bm = aij->B->rmap->n; 567 ba = b->a; 568 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 569 } else col = in[j]; 570 nonew = b->nonew; 571 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 572 } 573 } 574 } else { 575 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 576 if (!aij->donotstash) { 577 mat->assembled = PETSC_FALSE; 578 if (roworiented) { 579 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 580 } else { 581 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 582 } 583 } 584 } 585 } 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatGetValues_MPIAIJ" 591 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 592 { 593 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 594 PetscErrorCode ierr; 595 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 596 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 597 598 PetscFunctionBegin; 599 for (i=0; i<m; i++) { 600 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 601 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 602 if (idxm[i] >= rstart && idxm[i] < rend) { 603 row = idxm[i] - rstart; 604 for (j=0; j<n; j++) { 605 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 606 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 607 if (idxn[j] >= cstart && idxn[j] < cend) { 608 col = idxn[j] - cstart; 609 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 610 } else { 611 if (!aij->colmap) { 612 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 613 } 614 #if defined(PETSC_USE_CTABLE) 615 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 616 col--; 617 #else 618 col = aij->colmap[idxn[j]] - 1; 619 #endif 620 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 621 else { 622 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 623 } 624 } 625 } 626 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 627 } 628 PetscFunctionReturn(0); 629 } 630 631 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 632 633 #undef __FUNCT__ 634 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 635 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 636 { 637 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 638 PetscErrorCode ierr; 639 PetscInt nstash,reallocs; 640 InsertMode addv; 641 642 PetscFunctionBegin; 643 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 644 645 /* make sure all processors are either in INSERTMODE or ADDMODE */ 646 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 647 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 648 mat->insertmode = addv; /* in case this processor had no cache */ 649 650 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 651 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 652 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 653 PetscFunctionReturn(0); 654 } 655 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 658 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 659 { 660 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 661 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 662 PetscErrorCode ierr; 663 PetscMPIInt n; 664 PetscInt i,j,rstart,ncols,flg; 665 PetscInt *row,*col; 666 PetscBool other_disassembled; 667 PetscScalar *val; 668 InsertMode addv = mat->insertmode; 669 670 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 671 672 PetscFunctionBegin; 673 if (!aij->donotstash && !mat->nooffprocentries) { 674 while (1) { 675 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 676 if (!flg) break; 677 678 for (i=0; i<n; ) { 679 /* Now identify the consecutive vals belonging to the same row */ 680 for (j=i,rstart=row[j]; j<n; j++) { 681 if (row[j] != rstart) break; 682 } 683 if (j < n) ncols = j-i; 684 else ncols = n-i; 685 /* Now assemble all these values with a single function call */ 686 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 687 688 i = j; 689 } 690 } 691 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 692 } 693 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 694 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 695 696 /* determine if any processor has disassembled, if so we must 697 also disassemble ourselfs, in order that we may reassemble. */ 698 /* 699 if nonzero structure of submatrix B cannot change then we know that 700 no processor disassembled thus we can skip this stuff 701 */ 702 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 703 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 704 if (mat->was_assembled && !other_disassembled) { 705 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 706 } 707 } 708 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 709 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 710 } 711 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 712 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 713 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 714 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 715 716 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 717 718 aij->rowvalues = 0; 719 720 /* used by MatAXPY() */ 721 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 722 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 723 724 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 725 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 731 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 732 { 733 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 734 PetscErrorCode ierr; 735 736 PetscFunctionBegin; 737 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 738 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatZeroRows_MPIAIJ" 744 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 745 { 746 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 747 PetscErrorCode ierr; 748 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 749 PetscInt i,*owners = A->rmap->range; 750 PetscInt *nprocs,j,idx,nsends,row; 751 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 752 PetscInt *rvalues,count,base,slen,*source; 753 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 754 MPI_Comm comm; 755 MPI_Request *send_waits,*recv_waits; 756 MPI_Status recv_status,*send_status; 757 const PetscScalar *xx; 758 PetscScalar *bb; 759 #if defined(PETSC_DEBUG) 760 PetscBool found = PETSC_FALSE; 761 #endif 762 763 PetscFunctionBegin; 764 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 765 /* first count number of contributors to each processor */ 766 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 767 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 768 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 769 j = 0; 770 for (i=0; i<N; i++) { 771 if (lastidx > (idx = rows[i])) j = 0; 772 lastidx = idx; 773 for (; j<size; j++) { 774 if (idx >= owners[j] && idx < owners[j+1]) { 775 nprocs[2*j]++; 776 nprocs[2*j+1] = 1; 777 owner[i] = j; 778 #if defined(PETSC_DEBUG) 779 found = PETSC_TRUE; 780 #endif 781 break; 782 } 783 } 784 #if defined(PETSC_DEBUG) 785 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 786 found = PETSC_FALSE; 787 #endif 788 } 789 nsends = 0; 790 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 791 792 if (A->nooffproczerorows) { 793 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 794 nrecvs = nsends; 795 nmax = N; 796 } else { 797 /* inform other processors of number of messages and max length*/ 798 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 799 } 800 801 /* post receives: */ 802 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 803 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 804 for (i=0; i<nrecvs; i++) { 805 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 806 } 807 808 /* do sends: 809 1) starts[i] gives the starting index in svalues for stuff going to 810 the ith processor 811 */ 812 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 813 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 814 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 815 816 starts[0] = 0; 817 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 818 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 819 820 starts[0] = 0; 821 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 822 count = 0; 823 for (i=0; i<size; i++) { 824 if (nprocs[2*i+1]) { 825 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 826 } 827 } 828 ierr = PetscFree(starts);CHKERRQ(ierr); 829 830 base = owners[rank]; 831 832 /* wait on receives */ 833 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 834 count = nrecvs; slen = 0; 835 while (count) { 836 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 837 /* unpack receives into our local space */ 838 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 839 840 source[imdex] = recv_status.MPI_SOURCE; 841 lens[imdex] = n; 842 slen += n; 843 count--; 844 } 845 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 846 847 /* move the data into the send scatter */ 848 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 849 count = 0; 850 for (i=0; i<nrecvs; i++) { 851 values = rvalues + i*nmax; 852 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 853 } 854 ierr = PetscFree(rvalues);CHKERRQ(ierr); 855 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 856 ierr = PetscFree(owner);CHKERRQ(ierr); 857 ierr = PetscFree(nprocs);CHKERRQ(ierr); 858 859 /* fix right hand side if needed */ 860 if (x && b) { 861 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 862 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 863 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 864 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 865 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 866 } 867 /* 868 Zero the required rows. If the "diagonal block" of the matrix 869 is square and the user wishes to set the diagonal we use separate 870 code so that MatSetValues() is not called for each diagonal allocating 871 new memory, thus calling lots of mallocs and slowing things down. 872 873 */ 874 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 875 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 876 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 877 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 878 } else if (diag != 0.0) { 879 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 880 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 881 for (i = 0; i < slen; i++) { 882 row = lrows[i] + rstart; 883 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 884 } 885 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 886 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 } else { 888 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 889 } 890 ierr = PetscFree(lrows);CHKERRQ(ierr); 891 892 /* wait on sends */ 893 if (nsends) { 894 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 895 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 896 ierr = PetscFree(send_status);CHKERRQ(ierr); 897 } 898 ierr = PetscFree(send_waits);CHKERRQ(ierr); 899 ierr = PetscFree(svalues);CHKERRQ(ierr); 900 PetscFunctionReturn(0); 901 } 902 903 #undef __FUNCT__ 904 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 905 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 906 { 907 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 908 PetscErrorCode ierr; 909 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 910 PetscInt i,*owners = A->rmap->range; 911 PetscInt *nprocs,j,idx,nsends; 912 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 913 PetscInt *rvalues,count,base,slen,*source; 914 PetscInt *lens,*lrows,*values,m; 915 MPI_Comm comm; 916 MPI_Request *send_waits,*recv_waits; 917 MPI_Status recv_status,*send_status; 918 const PetscScalar *xx; 919 PetscScalar *bb,*mask; 920 Vec xmask,lmask; 921 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 922 const PetscInt *aj, *ii,*ridx; 923 PetscScalar *aa; 924 #if defined(PETSC_DEBUG) 925 PetscBool found = PETSC_FALSE; 926 #endif 927 928 PetscFunctionBegin; 929 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 930 /* first count number of contributors to each processor */ 931 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 932 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 933 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 934 j = 0; 935 for (i=0; i<N; i++) { 936 if (lastidx > (idx = rows[i])) j = 0; 937 lastidx = idx; 938 for (; j<size; j++) { 939 if (idx >= owners[j] && idx < owners[j+1]) { 940 nprocs[2*j]++; 941 nprocs[2*j+1] = 1; 942 owner[i] = j; 943 #if defined(PETSC_DEBUG) 944 found = PETSC_TRUE; 945 #endif 946 break; 947 } 948 } 949 #if defined(PETSC_DEBUG) 950 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 951 found = PETSC_FALSE; 952 #endif 953 } 954 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 955 956 /* inform other processors of number of messages and max length*/ 957 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 958 959 /* post receives: */ 960 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 961 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 962 for (i=0; i<nrecvs; i++) { 963 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 964 } 965 966 /* do sends: 967 1) starts[i] gives the starting index in svalues for stuff going to 968 the ith processor 969 */ 970 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 971 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 972 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 973 974 starts[0] = 0; 975 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 976 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 977 978 starts[0] = 0; 979 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 980 count = 0; 981 for (i=0; i<size; i++) { 982 if (nprocs[2*i+1]) { 983 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 984 } 985 } 986 ierr = PetscFree(starts);CHKERRQ(ierr); 987 988 base = owners[rank]; 989 990 /* wait on receives */ 991 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 992 count = nrecvs; slen = 0; 993 while (count) { 994 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 995 /* unpack receives into our local space */ 996 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 997 998 source[imdex] = recv_status.MPI_SOURCE; 999 lens[imdex] = n; 1000 slen += n; 1001 count--; 1002 } 1003 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1004 1005 /* move the data into the send scatter */ 1006 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1007 count = 0; 1008 for (i=0; i<nrecvs; i++) { 1009 values = rvalues + i*nmax; 1010 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1011 } 1012 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1013 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1014 ierr = PetscFree(owner);CHKERRQ(ierr); 1015 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1016 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1017 1018 /* zero diagonal part of matrix */ 1019 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1020 1021 /* handle off diagonal part of matrix */ 1022 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1023 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1024 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1025 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1026 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1027 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1030 if (x) { 1031 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1034 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1035 } 1036 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1037 1038 /* remove zeroed rows of off diagonal matrix */ 1039 ii = aij->i; 1040 for (i=0; i<slen; i++) { 1041 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1042 } 1043 1044 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1045 if (aij->compressedrow.use) { 1046 m = aij->compressedrow.nrows; 1047 ii = aij->compressedrow.i; 1048 ridx = aij->compressedrow.rindex; 1049 for (i=0; i<m; i++) { 1050 n = ii[i+1] - ii[i]; 1051 aj = aij->j + ii[i]; 1052 aa = aij->a + ii[i]; 1053 1054 for (j=0; j<n; j++) { 1055 if (PetscAbsScalar(mask[*aj])) { 1056 if (b) bb[*ridx] -= *aa*xx[*aj]; 1057 *aa = 0.0; 1058 } 1059 aa++; 1060 aj++; 1061 } 1062 ridx++; 1063 } 1064 } else { /* do not use compressed row format */ 1065 m = l->B->rmap->n; 1066 for (i=0; i<m; i++) { 1067 n = ii[i+1] - ii[i]; 1068 aj = aij->j + ii[i]; 1069 aa = aij->a + ii[i]; 1070 for (j=0; j<n; j++) { 1071 if (PetscAbsScalar(mask[*aj])) { 1072 if (b) bb[i] -= *aa*xx[*aj]; 1073 *aa = 0.0; 1074 } 1075 aa++; 1076 aj++; 1077 } 1078 } 1079 } 1080 if (x) { 1081 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1082 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1083 } 1084 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1085 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1086 ierr = PetscFree(lrows);CHKERRQ(ierr); 1087 1088 /* wait on sends */ 1089 if (nsends) { 1090 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1091 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1092 ierr = PetscFree(send_status);CHKERRQ(ierr); 1093 } 1094 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1095 ierr = PetscFree(svalues);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatMult_MPIAIJ" 1101 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1102 { 1103 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1104 PetscErrorCode ierr; 1105 PetscInt nt; 1106 1107 PetscFunctionBegin; 1108 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1109 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1110 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1111 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1112 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1113 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1119 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1120 { 1121 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1122 PetscErrorCode ierr; 1123 1124 PetscFunctionBegin; 1125 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1126 PetscFunctionReturn(0); 1127 } 1128 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1131 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1132 { 1133 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1134 PetscErrorCode ierr; 1135 1136 PetscFunctionBegin; 1137 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1138 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1139 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 #undef __FUNCT__ 1145 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1146 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1147 { 1148 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1149 PetscErrorCode ierr; 1150 PetscBool merged; 1151 1152 PetscFunctionBegin; 1153 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1154 /* do nondiagonal part */ 1155 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1156 if (!merged) { 1157 /* send it on its way */ 1158 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1159 /* do local part */ 1160 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1161 /* receive remote parts: note this assumes the values are not actually */ 1162 /* added in yy until the next line, */ 1163 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1164 } else { 1165 /* do local part */ 1166 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1167 /* send it on its way */ 1168 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1169 /* values actually were received in the Begin() but we need to call this nop */ 1170 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1171 } 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1177 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1178 { 1179 MPI_Comm comm; 1180 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1181 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1182 IS Me,Notme; 1183 PetscErrorCode ierr; 1184 PetscInt M,N,first,last,*notme,i; 1185 PetscMPIInt size; 1186 1187 PetscFunctionBegin; 1188 /* Easy test: symmetric diagonal block */ 1189 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1190 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1191 if (!*f) PetscFunctionReturn(0); 1192 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1193 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1194 if (size == 1) PetscFunctionReturn(0); 1195 1196 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1197 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1198 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1199 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1200 for (i=0; i<first; i++) notme[i] = i; 1201 for (i=last; i<M; i++) notme[i-last+first] = i; 1202 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1203 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1204 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1205 Aoff = Aoffs[0]; 1206 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1207 Boff = Boffs[0]; 1208 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1209 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1210 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1211 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1212 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1213 ierr = PetscFree(notme);CHKERRQ(ierr); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1219 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1220 { 1221 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1222 PetscErrorCode ierr; 1223 1224 PetscFunctionBegin; 1225 /* do nondiagonal part */ 1226 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1227 /* send it on its way */ 1228 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1229 /* do local part */ 1230 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1231 /* receive remote parts */ 1232 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 /* 1237 This only works correctly for square matrices where the subblock A->A is the 1238 diagonal block 1239 */ 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1242 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1243 { 1244 PetscErrorCode ierr; 1245 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1246 1247 PetscFunctionBegin; 1248 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1249 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1250 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatScale_MPIAIJ" 1256 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1257 { 1258 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1259 PetscErrorCode ierr; 1260 1261 PetscFunctionBegin; 1262 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1263 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatDestroy_MPIAIJ" 1269 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1270 { 1271 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1272 PetscErrorCode ierr; 1273 1274 PetscFunctionBegin; 1275 #if defined(PETSC_USE_LOG) 1276 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1277 #endif 1278 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1279 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1280 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1281 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1282 #if defined(PETSC_USE_CTABLE) 1283 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1284 #else 1285 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1286 #endif 1287 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1288 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1289 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1290 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1291 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1292 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1293 1294 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",NULL);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1308 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1309 { 1310 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1311 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1312 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1313 PetscErrorCode ierr; 1314 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1315 int fd; 1316 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1317 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1318 PetscScalar *column_values; 1319 PetscInt message_count,flowcontrolcount; 1320 FILE *file; 1321 1322 PetscFunctionBegin; 1323 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1324 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1325 nz = A->nz + B->nz; 1326 if (!rank) { 1327 header[0] = MAT_FILE_CLASSID; 1328 header[1] = mat->rmap->N; 1329 header[2] = mat->cmap->N; 1330 1331 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1332 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1333 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1334 /* get largest number of rows any processor has */ 1335 rlen = mat->rmap->n; 1336 range = mat->rmap->range; 1337 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1338 } else { 1339 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1340 rlen = mat->rmap->n; 1341 } 1342 1343 /* load up the local row counts */ 1344 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1345 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1346 1347 /* store the row lengths to the file */ 1348 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1349 if (!rank) { 1350 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1351 for (i=1; i<size; i++) { 1352 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1353 rlen = range[i+1] - range[i]; 1354 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1355 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1356 } 1357 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1358 } else { 1359 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1360 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1361 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1362 } 1363 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1364 1365 /* load up the local column indices */ 1366 nzmax = nz; /* th processor needs space a largest processor needs */ 1367 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1368 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1369 cnt = 0; 1370 for (i=0; i<mat->rmap->n; i++) { 1371 for (j=B->i[i]; j<B->i[i+1]; j++) { 1372 if ((col = garray[B->j[j]]) > cstart) break; 1373 column_indices[cnt++] = col; 1374 } 1375 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1376 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1377 } 1378 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1379 1380 /* store the column indices to the file */ 1381 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1382 if (!rank) { 1383 MPI_Status status; 1384 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1385 for (i=1; i<size; i++) { 1386 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1387 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1388 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1389 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1390 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1391 } 1392 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1393 } else { 1394 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1395 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1396 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1397 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1398 } 1399 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1400 1401 /* load up the local column values */ 1402 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1403 cnt = 0; 1404 for (i=0; i<mat->rmap->n; i++) { 1405 for (j=B->i[i]; j<B->i[i+1]; j++) { 1406 if (garray[B->j[j]] > cstart) break; 1407 column_values[cnt++] = B->a[j]; 1408 } 1409 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1410 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1411 } 1412 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1413 1414 /* store the column values to the file */ 1415 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1416 if (!rank) { 1417 MPI_Status status; 1418 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1419 for (i=1; i<size; i++) { 1420 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1421 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1422 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1423 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1424 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1425 } 1426 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1427 } else { 1428 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1429 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1430 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1431 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1432 } 1433 ierr = PetscFree(column_values);CHKERRQ(ierr); 1434 1435 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1436 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #include <petscdraw.h> 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1443 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1444 { 1445 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1446 PetscErrorCode ierr; 1447 PetscMPIInt rank = aij->rank,size = aij->size; 1448 PetscBool isdraw,iascii,isbinary; 1449 PetscViewer sviewer; 1450 PetscViewerFormat format; 1451 1452 PetscFunctionBegin; 1453 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1456 if (iascii) { 1457 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1458 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1459 MatInfo info; 1460 PetscBool inodes; 1461 1462 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1463 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1464 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1465 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1466 if (!inodes) { 1467 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1468 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1469 } else { 1470 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1471 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1472 } 1473 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1474 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1475 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1476 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1477 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1478 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1479 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1480 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1481 PetscFunctionReturn(0); 1482 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1483 PetscInt inodecount,inodelimit,*inodes; 1484 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1485 if (inodes) { 1486 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1487 } else { 1488 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1489 } 1490 PetscFunctionReturn(0); 1491 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1492 PetscFunctionReturn(0); 1493 } 1494 } else if (isbinary) { 1495 if (size == 1) { 1496 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1497 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1498 } else { 1499 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1500 } 1501 PetscFunctionReturn(0); 1502 } else if (isdraw) { 1503 PetscDraw draw; 1504 PetscBool isnull; 1505 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1506 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1507 } 1508 1509 if (size == 1) { 1510 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1511 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1512 } else { 1513 /* assemble the entire matrix onto first processor. */ 1514 Mat A; 1515 Mat_SeqAIJ *Aloc; 1516 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1517 MatScalar *a; 1518 1519 if (mat->rmap->N > 1024) { 1520 PetscBool flg = PETSC_FALSE; 1521 1522 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1523 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1524 } 1525 1526 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1527 if (!rank) { 1528 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1529 } else { 1530 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1531 } 1532 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1533 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1534 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1535 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1536 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1537 1538 /* copy over the A part */ 1539 Aloc = (Mat_SeqAIJ*)aij->A->data; 1540 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1541 row = mat->rmap->rstart; 1542 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1543 for (i=0; i<m; i++) { 1544 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1545 row++; 1546 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1547 } 1548 aj = Aloc->j; 1549 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1550 1551 /* copy over the B part */ 1552 Aloc = (Mat_SeqAIJ*)aij->B->data; 1553 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1554 row = mat->rmap->rstart; 1555 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1556 ct = cols; 1557 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1558 for (i=0; i<m; i++) { 1559 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1560 row++; 1561 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1562 } 1563 ierr = PetscFree(ct);CHKERRQ(ierr); 1564 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1565 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1566 /* 1567 Everyone has to call to draw the matrix since the graphics waits are 1568 synchronized across all processors that share the PetscDraw object 1569 */ 1570 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1571 if (!rank) { 1572 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1573 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1574 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1575 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1576 } 1577 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1578 ierr = MatDestroy(&A);CHKERRQ(ierr); 1579 } 1580 PetscFunctionReturn(0); 1581 } 1582 1583 #undef __FUNCT__ 1584 #define __FUNCT__ "MatView_MPIAIJ" 1585 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1586 { 1587 PetscErrorCode ierr; 1588 PetscBool iascii,isdraw,issocket,isbinary; 1589 1590 PetscFunctionBegin; 1591 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1592 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1595 if (iascii || isdraw || isbinary || issocket) { 1596 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1597 } 1598 PetscFunctionReturn(0); 1599 } 1600 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatSOR_MPIAIJ" 1603 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1604 { 1605 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1606 PetscErrorCode ierr; 1607 Vec bb1 = 0; 1608 PetscBool hasop; 1609 1610 PetscFunctionBegin; 1611 if (flag == SOR_APPLY_UPPER) { 1612 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1613 PetscFunctionReturn(0); 1614 } 1615 1616 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1617 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1618 } 1619 1620 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1621 if (flag & SOR_ZERO_INITIAL_GUESS) { 1622 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1623 its--; 1624 } 1625 1626 while (its--) { 1627 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1628 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 1630 /* update rhs: bb1 = bb - B*x */ 1631 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1632 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1633 1634 /* local sweep */ 1635 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1636 } 1637 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1638 if (flag & SOR_ZERO_INITIAL_GUESS) { 1639 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1640 its--; 1641 } 1642 while (its--) { 1643 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1644 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 1646 /* update rhs: bb1 = bb - B*x */ 1647 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1648 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1649 1650 /* local sweep */ 1651 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1652 } 1653 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1654 if (flag & SOR_ZERO_INITIAL_GUESS) { 1655 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1656 its--; 1657 } 1658 while (its--) { 1659 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1660 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 1662 /* update rhs: bb1 = bb - B*x */ 1663 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1664 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1665 1666 /* local sweep */ 1667 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1668 } 1669 } else if (flag & SOR_EISENSTAT) { 1670 Vec xx1; 1671 1672 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1673 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1674 1675 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1676 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1677 if (!mat->diag) { 1678 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1679 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1680 } 1681 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1682 if (hasop) { 1683 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1684 } else { 1685 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1686 } 1687 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1688 1689 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1690 1691 /* local sweep */ 1692 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1693 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1694 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1695 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1696 1697 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1698 PetscFunctionReturn(0); 1699 } 1700 1701 #undef __FUNCT__ 1702 #define __FUNCT__ "MatPermute_MPIAIJ" 1703 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1704 { 1705 Mat aA,aB,Aperm; 1706 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1707 PetscScalar *aa,*ba; 1708 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1709 PetscSF rowsf,sf; 1710 IS parcolp = NULL; 1711 PetscBool done; 1712 PetscErrorCode ierr; 1713 1714 PetscFunctionBegin; 1715 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1716 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1717 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1718 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1719 1720 /* Invert row permutation to find out where my rows should go */ 1721 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1722 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1723 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1724 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1725 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1726 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1727 1728 /* Invert column permutation to find out where my columns should go */ 1729 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1730 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1731 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1732 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1733 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1734 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1735 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1736 1737 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1738 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1739 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1740 1741 /* Find out where my gcols should go */ 1742 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1743 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1744 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1745 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1746 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1747 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1748 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1749 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1750 1751 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1752 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1753 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1755 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1756 for (i=0; i<m; i++) { 1757 PetscInt row = rdest[i],rowner; 1758 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1759 for (j=ai[i]; j<ai[i+1]; j++) { 1760 PetscInt cowner,col = cdest[aj[j]]; 1761 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1762 if (rowner == cowner) dnnz[i]++; 1763 else onnz[i]++; 1764 } 1765 for (j=bi[i]; j<bi[i+1]; j++) { 1766 PetscInt cowner,col = gcdest[bj[j]]; 1767 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1768 if (rowner == cowner) dnnz[i]++; 1769 else onnz[i]++; 1770 } 1771 } 1772 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1773 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1774 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1775 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1776 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1777 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1778 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1779 1780 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1781 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1782 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1783 for (i=0; i<m; i++) { 1784 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1785 PetscInt rowlen; 1786 rowlen = ai[i+1] - ai[i]; 1787 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1788 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1789 rowlen = bi[i+1] - bi[i]; 1790 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1791 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1792 } 1793 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1794 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1795 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1796 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1797 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1798 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1799 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1800 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1801 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1802 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1803 *B = Aperm; 1804 PetscFunctionReturn(0); 1805 } 1806 1807 #undef __FUNCT__ 1808 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1809 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1810 { 1811 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1812 Mat A = mat->A,B = mat->B; 1813 PetscErrorCode ierr; 1814 PetscReal isend[5],irecv[5]; 1815 1816 PetscFunctionBegin; 1817 info->block_size = 1.0; 1818 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1819 1820 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1821 isend[3] = info->memory; isend[4] = info->mallocs; 1822 1823 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1824 1825 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1826 isend[3] += info->memory; isend[4] += info->mallocs; 1827 if (flag == MAT_LOCAL) { 1828 info->nz_used = isend[0]; 1829 info->nz_allocated = isend[1]; 1830 info->nz_unneeded = isend[2]; 1831 info->memory = isend[3]; 1832 info->mallocs = isend[4]; 1833 } else if (flag == MAT_GLOBAL_MAX) { 1834 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1835 1836 info->nz_used = irecv[0]; 1837 info->nz_allocated = irecv[1]; 1838 info->nz_unneeded = irecv[2]; 1839 info->memory = irecv[3]; 1840 info->mallocs = irecv[4]; 1841 } else if (flag == MAT_GLOBAL_SUM) { 1842 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1843 1844 info->nz_used = irecv[0]; 1845 info->nz_allocated = irecv[1]; 1846 info->nz_unneeded = irecv[2]; 1847 info->memory = irecv[3]; 1848 info->mallocs = irecv[4]; 1849 } 1850 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1851 info->fill_ratio_needed = 0; 1852 info->factor_mallocs = 0; 1853 PetscFunctionReturn(0); 1854 } 1855 1856 #undef __FUNCT__ 1857 #define __FUNCT__ "MatSetOption_MPIAIJ" 1858 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1859 { 1860 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1861 PetscErrorCode ierr; 1862 1863 PetscFunctionBegin; 1864 switch (op) { 1865 case MAT_NEW_NONZERO_LOCATIONS: 1866 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1867 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1868 case MAT_KEEP_NONZERO_PATTERN: 1869 case MAT_NEW_NONZERO_LOCATION_ERR: 1870 case MAT_USE_INODES: 1871 case MAT_IGNORE_ZERO_ENTRIES: 1872 MatCheckPreallocated(A,1); 1873 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1874 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1875 break; 1876 case MAT_ROW_ORIENTED: 1877 a->roworiented = flg; 1878 1879 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1880 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1881 break; 1882 case MAT_NEW_DIAGONALS: 1883 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1884 break; 1885 case MAT_IGNORE_OFF_PROC_ENTRIES: 1886 a->donotstash = flg; 1887 break; 1888 case MAT_SPD: 1889 A->spd_set = PETSC_TRUE; 1890 A->spd = flg; 1891 if (flg) { 1892 A->symmetric = PETSC_TRUE; 1893 A->structurally_symmetric = PETSC_TRUE; 1894 A->symmetric_set = PETSC_TRUE; 1895 A->structurally_symmetric_set = PETSC_TRUE; 1896 } 1897 break; 1898 case MAT_SYMMETRIC: 1899 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1900 break; 1901 case MAT_STRUCTURALLY_SYMMETRIC: 1902 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1903 break; 1904 case MAT_HERMITIAN: 1905 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1906 break; 1907 case MAT_SYMMETRY_ETERNAL: 1908 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1909 break; 1910 default: 1911 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1912 } 1913 PetscFunctionReturn(0); 1914 } 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatGetRow_MPIAIJ" 1918 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1919 { 1920 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1921 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1922 PetscErrorCode ierr; 1923 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1924 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1925 PetscInt *cmap,*idx_p; 1926 1927 PetscFunctionBegin; 1928 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1929 mat->getrowactive = PETSC_TRUE; 1930 1931 if (!mat->rowvalues && (idx || v)) { 1932 /* 1933 allocate enough space to hold information from the longest row. 1934 */ 1935 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1936 PetscInt max = 1,tmp; 1937 for (i=0; i<matin->rmap->n; i++) { 1938 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1939 if (max < tmp) max = tmp; 1940 } 1941 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1942 } 1943 1944 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1945 lrow = row - rstart; 1946 1947 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1948 if (!v) {pvA = 0; pvB = 0;} 1949 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1950 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1951 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1952 nztot = nzA + nzB; 1953 1954 cmap = mat->garray; 1955 if (v || idx) { 1956 if (nztot) { 1957 /* Sort by increasing column numbers, assuming A and B already sorted */ 1958 PetscInt imark = -1; 1959 if (v) { 1960 *v = v_p = mat->rowvalues; 1961 for (i=0; i<nzB; i++) { 1962 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1963 else break; 1964 } 1965 imark = i; 1966 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1967 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1968 } 1969 if (idx) { 1970 *idx = idx_p = mat->rowindices; 1971 if (imark > -1) { 1972 for (i=0; i<imark; i++) { 1973 idx_p[i] = cmap[cworkB[i]]; 1974 } 1975 } else { 1976 for (i=0; i<nzB; i++) { 1977 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1978 else break; 1979 } 1980 imark = i; 1981 } 1982 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1983 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1984 } 1985 } else { 1986 if (idx) *idx = 0; 1987 if (v) *v = 0; 1988 } 1989 } 1990 *nz = nztot; 1991 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1992 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1993 PetscFunctionReturn(0); 1994 } 1995 1996 #undef __FUNCT__ 1997 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1998 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1999 { 2000 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2001 2002 PetscFunctionBegin; 2003 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2004 aij->getrowactive = PETSC_FALSE; 2005 PetscFunctionReturn(0); 2006 } 2007 2008 #undef __FUNCT__ 2009 #define __FUNCT__ "MatNorm_MPIAIJ" 2010 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2011 { 2012 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2013 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2014 PetscErrorCode ierr; 2015 PetscInt i,j,cstart = mat->cmap->rstart; 2016 PetscReal sum = 0.0; 2017 MatScalar *v; 2018 2019 PetscFunctionBegin; 2020 if (aij->size == 1) { 2021 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2022 } else { 2023 if (type == NORM_FROBENIUS) { 2024 v = amat->a; 2025 for (i=0; i<amat->nz; i++) { 2026 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2027 } 2028 v = bmat->a; 2029 for (i=0; i<bmat->nz; i++) { 2030 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2031 } 2032 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2033 *norm = PetscSqrtReal(*norm); 2034 } else if (type == NORM_1) { /* max column norm */ 2035 PetscReal *tmp,*tmp2; 2036 PetscInt *jj,*garray = aij->garray; 2037 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2038 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2039 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2040 *norm = 0.0; 2041 v = amat->a; jj = amat->j; 2042 for (j=0; j<amat->nz; j++) { 2043 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2044 } 2045 v = bmat->a; jj = bmat->j; 2046 for (j=0; j<bmat->nz; j++) { 2047 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2048 } 2049 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2050 for (j=0; j<mat->cmap->N; j++) { 2051 if (tmp2[j] > *norm) *norm = tmp2[j]; 2052 } 2053 ierr = PetscFree(tmp);CHKERRQ(ierr); 2054 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2055 } else if (type == NORM_INFINITY) { /* max row norm */ 2056 PetscReal ntemp = 0.0; 2057 for (j=0; j<aij->A->rmap->n; j++) { 2058 v = amat->a + amat->i[j]; 2059 sum = 0.0; 2060 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2061 sum += PetscAbsScalar(*v); v++; 2062 } 2063 v = bmat->a + bmat->i[j]; 2064 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2065 sum += PetscAbsScalar(*v); v++; 2066 } 2067 if (sum > ntemp) ntemp = sum; 2068 } 2069 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2070 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2071 } 2072 PetscFunctionReturn(0); 2073 } 2074 2075 #undef __FUNCT__ 2076 #define __FUNCT__ "MatTranspose_MPIAIJ" 2077 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2078 { 2079 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2080 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2081 PetscErrorCode ierr; 2082 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2083 PetscInt cstart = A->cmap->rstart,ncol; 2084 Mat B; 2085 MatScalar *array; 2086 2087 PetscFunctionBegin; 2088 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2089 2090 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2091 ai = Aloc->i; aj = Aloc->j; 2092 bi = Bloc->i; bj = Bloc->j; 2093 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2094 PetscInt *d_nnz,*g_nnz,*o_nnz; 2095 PetscSFNode *oloc; 2096 PETSC_UNUSED PetscSF sf; 2097 2098 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2099 /* compute d_nnz for preallocation */ 2100 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2101 for (i=0; i<ai[ma]; i++) { 2102 d_nnz[aj[i]]++; 2103 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2104 } 2105 /* compute local off-diagonal contributions */ 2106 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2107 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2108 /* map those to global */ 2109 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2110 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2111 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2112 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2113 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2114 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2115 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2116 2117 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2118 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2119 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2120 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2121 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2122 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2123 } else { 2124 B = *matout; 2125 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2126 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2127 } 2128 2129 /* copy over the A part */ 2130 array = Aloc->a; 2131 row = A->rmap->rstart; 2132 for (i=0; i<ma; i++) { 2133 ncol = ai[i+1]-ai[i]; 2134 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2135 row++; 2136 array += ncol; aj += ncol; 2137 } 2138 aj = Aloc->j; 2139 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2140 2141 /* copy over the B part */ 2142 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2143 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2144 array = Bloc->a; 2145 row = A->rmap->rstart; 2146 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2147 cols_tmp = cols; 2148 for (i=0; i<mb; i++) { 2149 ncol = bi[i+1]-bi[i]; 2150 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2151 row++; 2152 array += ncol; cols_tmp += ncol; 2153 } 2154 ierr = PetscFree(cols);CHKERRQ(ierr); 2155 2156 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2157 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2159 *matout = B; 2160 } else { 2161 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2162 } 2163 PetscFunctionReturn(0); 2164 } 2165 2166 #undef __FUNCT__ 2167 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2168 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2169 { 2170 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2171 Mat a = aij->A,b = aij->B; 2172 PetscErrorCode ierr; 2173 PetscInt s1,s2,s3; 2174 2175 PetscFunctionBegin; 2176 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2177 if (rr) { 2178 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2179 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2180 /* Overlap communication with computation. */ 2181 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2182 } 2183 if (ll) { 2184 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2185 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2186 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2187 } 2188 /* scale the diagonal block */ 2189 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2190 2191 if (rr) { 2192 /* Do a scatter end and then right scale the off-diagonal block */ 2193 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2194 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2195 } 2196 PetscFunctionReturn(0); 2197 } 2198 2199 #undef __FUNCT__ 2200 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2201 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2202 { 2203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2204 PetscErrorCode ierr; 2205 2206 PetscFunctionBegin; 2207 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2208 PetscFunctionReturn(0); 2209 } 2210 2211 #undef __FUNCT__ 2212 #define __FUNCT__ "MatEqual_MPIAIJ" 2213 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2214 { 2215 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2216 Mat a,b,c,d; 2217 PetscBool flg; 2218 PetscErrorCode ierr; 2219 2220 PetscFunctionBegin; 2221 a = matA->A; b = matA->B; 2222 c = matB->A; d = matB->B; 2223 2224 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2225 if (flg) { 2226 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2227 } 2228 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2229 PetscFunctionReturn(0); 2230 } 2231 2232 #undef __FUNCT__ 2233 #define __FUNCT__ "MatCopy_MPIAIJ" 2234 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2235 { 2236 PetscErrorCode ierr; 2237 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2238 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2239 2240 PetscFunctionBegin; 2241 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2242 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2243 /* because of the column compression in the off-processor part of the matrix a->B, 2244 the number of columns in a->B and b->B may be different, hence we cannot call 2245 the MatCopy() directly on the two parts. If need be, we can provide a more 2246 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2247 then copying the submatrices */ 2248 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2249 } else { 2250 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2251 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2252 } 2253 PetscFunctionReturn(0); 2254 } 2255 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "MatSetUp_MPIAIJ" 2258 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2259 { 2260 PetscErrorCode ierr; 2261 2262 PetscFunctionBegin; 2263 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2264 PetscFunctionReturn(0); 2265 } 2266 2267 #undef __FUNCT__ 2268 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2269 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2270 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2271 { 2272 PetscInt i,m=Y->rmap->N; 2273 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2274 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2275 const PetscInt *xi = x->i,*yi = y->i; 2276 2277 PetscFunctionBegin; 2278 /* Set the number of nonzeros in the new matrix */ 2279 for (i=0; i<m; i++) { 2280 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2281 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2282 nnz[i] = 0; 2283 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2284 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2285 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2286 nnz[i]++; 2287 } 2288 for (; k<nzy; k++) nnz[i]++; 2289 } 2290 PetscFunctionReturn(0); 2291 } 2292 2293 #undef __FUNCT__ 2294 #define __FUNCT__ "MatAXPY_MPIAIJ" 2295 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2296 { 2297 PetscErrorCode ierr; 2298 PetscInt i; 2299 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2300 PetscBLASInt bnz,one=1; 2301 Mat_SeqAIJ *x,*y; 2302 2303 PetscFunctionBegin; 2304 if (str == SAME_NONZERO_PATTERN) { 2305 PetscScalar alpha = a; 2306 x = (Mat_SeqAIJ*)xx->A->data; 2307 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2308 y = (Mat_SeqAIJ*)yy->A->data; 2309 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2310 x = (Mat_SeqAIJ*)xx->B->data; 2311 y = (Mat_SeqAIJ*)yy->B->data; 2312 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2313 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2314 } else if (str == SUBSET_NONZERO_PATTERN) { 2315 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2316 2317 x = (Mat_SeqAIJ*)xx->B->data; 2318 y = (Mat_SeqAIJ*)yy->B->data; 2319 if (y->xtoy && y->XtoY != xx->B) { 2320 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2321 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2322 } 2323 if (!y->xtoy) { /* get xtoy */ 2324 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2325 y->XtoY = xx->B; 2326 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2327 } 2328 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2329 } else { 2330 Mat B; 2331 PetscInt *nnz_d,*nnz_o; 2332 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2333 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2334 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2335 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2336 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2337 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2338 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2339 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2340 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2341 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2342 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2343 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2344 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2345 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2346 } 2347 PetscFunctionReturn(0); 2348 } 2349 2350 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2351 2352 #undef __FUNCT__ 2353 #define __FUNCT__ "MatConjugate_MPIAIJ" 2354 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2355 { 2356 #if defined(PETSC_USE_COMPLEX) 2357 PetscErrorCode ierr; 2358 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2359 2360 PetscFunctionBegin; 2361 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2362 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2363 #else 2364 PetscFunctionBegin; 2365 #endif 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatRealPart_MPIAIJ" 2371 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2372 { 2373 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2374 PetscErrorCode ierr; 2375 2376 PetscFunctionBegin; 2377 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2378 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2379 PetscFunctionReturn(0); 2380 } 2381 2382 #undef __FUNCT__ 2383 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2384 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2385 { 2386 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2387 PetscErrorCode ierr; 2388 2389 PetscFunctionBegin; 2390 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2391 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2392 PetscFunctionReturn(0); 2393 } 2394 2395 #if defined(PETSC_HAVE_PBGL) 2396 2397 #include <boost/parallel/mpi/bsp_process_group.hpp> 2398 #include <boost/graph/distributed/ilu_default_graph.hpp> 2399 #include <boost/graph/distributed/ilu_0_block.hpp> 2400 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2401 #include <boost/graph/distributed/petsc/interface.hpp> 2402 #include <boost/multi_array.hpp> 2403 #include <boost/parallel/distributed_property_map->hpp> 2404 2405 #undef __FUNCT__ 2406 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2407 /* 2408 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2409 */ 2410 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2411 { 2412 namespace petsc = boost::distributed::petsc; 2413 2414 namespace graph_dist = boost::graph::distributed; 2415 using boost::graph::distributed::ilu_default::process_group_type; 2416 using boost::graph::ilu_permuted; 2417 2418 PetscBool row_identity, col_identity; 2419 PetscContainer c; 2420 PetscInt m, n, M, N; 2421 PetscErrorCode ierr; 2422 2423 PetscFunctionBegin; 2424 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2425 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2426 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2427 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2428 2429 process_group_type pg; 2430 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2431 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2432 lgraph_type& level_graph = *lgraph_p; 2433 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2434 2435 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2436 ilu_permuted(level_graph); 2437 2438 /* put together the new matrix */ 2439 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2440 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2441 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2442 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2443 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2444 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2445 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2446 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2447 2448 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2449 ierr = PetscContainerSetPointer(c, lgraph_p); 2450 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2451 ierr = PetscContainerDestroy(&c); 2452 PetscFunctionReturn(0); 2453 } 2454 2455 #undef __FUNCT__ 2456 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2457 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2458 { 2459 PetscFunctionBegin; 2460 PetscFunctionReturn(0); 2461 } 2462 2463 #undef __FUNCT__ 2464 #define __FUNCT__ "MatSolve_MPIAIJ" 2465 /* 2466 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2467 */ 2468 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2469 { 2470 namespace graph_dist = boost::graph::distributed; 2471 2472 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2473 lgraph_type *lgraph_p; 2474 PetscContainer c; 2475 PetscErrorCode ierr; 2476 2477 PetscFunctionBegin; 2478 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2479 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2480 ierr = VecCopy(b, x);CHKERRQ(ierr); 2481 2482 PetscScalar *array_x; 2483 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2484 PetscInt sx; 2485 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2486 2487 PetscScalar *array_b; 2488 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2489 PetscInt sb; 2490 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2491 2492 lgraph_type& level_graph = *lgraph_p; 2493 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2494 2495 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2496 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2497 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2498 2499 typedef boost::iterator_property_map<array_ref_type::iterator, 2500 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2501 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2502 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2503 2504 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2505 PetscFunctionReturn(0); 2506 } 2507 #endif 2508 2509 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2510 PetscInt nzlocal,nsends,nrecvs; 2511 PetscMPIInt *send_rank,*recv_rank; 2512 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2513 PetscScalar *sbuf_a,**rbuf_a; 2514 PetscErrorCode (*Destroy)(Mat); 2515 } Mat_Redundant; 2516 2517 #undef __FUNCT__ 2518 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2519 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2520 { 2521 PetscErrorCode ierr; 2522 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2523 PetscInt i; 2524 2525 PetscFunctionBegin; 2526 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2527 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2528 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2529 for (i=0; i<redund->nrecvs; i++) { 2530 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2531 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2532 } 2533 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2534 ierr = PetscFree(redund);CHKERRQ(ierr); 2535 PetscFunctionReturn(0); 2536 } 2537 2538 #undef __FUNCT__ 2539 #define __FUNCT__ "MatDestroy_MatRedundant" 2540 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2541 { 2542 PetscErrorCode ierr; 2543 PetscContainer container; 2544 Mat_Redundant *redund=NULL; 2545 2546 PetscFunctionBegin; 2547 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2548 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2549 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2550 2551 A->ops->destroy = redund->Destroy; 2552 2553 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2554 if (A->ops->destroy) { 2555 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2556 } 2557 PetscFunctionReturn(0); 2558 } 2559 2560 #undef __FUNCT__ 2561 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2562 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2563 { 2564 PetscMPIInt rank,size; 2565 MPI_Comm comm; 2566 PetscErrorCode ierr; 2567 PetscInt nsends = 0,nrecvs=0,i,rownz_max=0; 2568 PetscMPIInt *send_rank= NULL,*recv_rank=NULL; 2569 PetscInt *rowrange = mat->rmap->range; 2570 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2571 Mat A = aij->A,B=aij->B,C=*matredundant; 2572 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2573 PetscScalar *sbuf_a; 2574 PetscInt nzlocal=a->nz+b->nz; 2575 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2576 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2577 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2578 MatScalar *aworkA,*aworkB; 2579 PetscScalar *vals; 2580 PetscMPIInt tag1,tag2,tag3,imdex; 2581 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2582 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2583 MPI_Status recv_status,*send_status; 2584 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2585 PetscInt **rbuf_j=NULL; 2586 PetscScalar **rbuf_a=NULL; 2587 Mat_Redundant *redund =NULL; 2588 PetscContainer container; 2589 2590 PetscFunctionBegin; 2591 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2592 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2593 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2594 2595 if (reuse == MAT_REUSE_MATRIX) { 2596 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2597 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2598 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2599 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2600 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2601 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2602 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2603 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2604 2605 nsends = redund->nsends; 2606 nrecvs = redund->nrecvs; 2607 send_rank = redund->send_rank; 2608 recv_rank = redund->recv_rank; 2609 sbuf_nz = redund->sbuf_nz; 2610 rbuf_nz = redund->rbuf_nz; 2611 sbuf_j = redund->sbuf_j; 2612 sbuf_a = redund->sbuf_a; 2613 rbuf_j = redund->rbuf_j; 2614 rbuf_a = redund->rbuf_a; 2615 } 2616 2617 if (reuse == MAT_INITIAL_MATRIX) { 2618 PetscMPIInt subrank,subsize; 2619 PetscInt nleftover,np_subcomm; 2620 /* get the destination processors' id send_rank, nsends and nrecvs */ 2621 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2622 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2623 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2624 2625 np_subcomm = size/nsubcomm; 2626 nleftover = size - nsubcomm*np_subcomm; 2627 2628 nsends = 0; nrecvs = 0; 2629 for (i=0; i<size; i++) { /* i=rank*/ 2630 if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */ 2631 send_rank[nsends] = i; nsends++; 2632 recv_rank[nrecvs++] = i; 2633 } 2634 } 2635 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2636 i = size-nleftover-1; 2637 j = 0; 2638 while (j < nsubcomm - nleftover) { 2639 send_rank[nsends++] = i; 2640 i--; j++; 2641 } 2642 } 2643 2644 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2645 for (i=0; i<nleftover; i++) { 2646 recv_rank[nrecvs++] = size-nleftover+i; 2647 } 2648 } 2649 2650 /* allocate sbuf_j, sbuf_a */ 2651 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2652 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2653 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2654 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2655 2656 /* copy mat's local entries into the buffers */ 2657 if (reuse == MAT_INITIAL_MATRIX) { 2658 rownz_max = 0; 2659 rptr = sbuf_j; 2660 cols = sbuf_j + rend-rstart + 1; 2661 vals = sbuf_a; 2662 rptr[0] = 0; 2663 for (i=0; i<rend-rstart; i++) { 2664 row = i + rstart; 2665 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2666 ncols = nzA + nzB; 2667 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2668 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2669 /* load the column indices for this row into cols */ 2670 lwrite = 0; 2671 for (l=0; l<nzB; l++) { 2672 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2673 vals[lwrite] = aworkB[l]; 2674 cols[lwrite++] = ctmp; 2675 } 2676 } 2677 for (l=0; l<nzA; l++) { 2678 vals[lwrite] = aworkA[l]; 2679 cols[lwrite++] = cstart + cworkA[l]; 2680 } 2681 for (l=0; l<nzB; l++) { 2682 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2683 vals[lwrite] = aworkB[l]; 2684 cols[lwrite++] = ctmp; 2685 } 2686 } 2687 vals += ncols; 2688 cols += ncols; 2689 rptr[i+1] = rptr[i] + ncols; 2690 if (rownz_max < ncols) rownz_max = ncols; 2691 } 2692 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2693 } else { /* only copy matrix values into sbuf_a */ 2694 rptr = sbuf_j; 2695 vals = sbuf_a; 2696 rptr[0] = 0; 2697 for (i=0; i<rend-rstart; i++) { 2698 row = i + rstart; 2699 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2700 ncols = nzA + nzB; 2701 cworkB = b->j + b->i[i]; 2702 aworkA = a->a + a->i[i]; 2703 aworkB = b->a + b->i[i]; 2704 lwrite = 0; 2705 for (l=0; l<nzB; l++) { 2706 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2707 } 2708 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2709 for (l=0; l<nzB; l++) { 2710 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2711 } 2712 vals += ncols; 2713 rptr[i+1] = rptr[i] + ncols; 2714 } 2715 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2716 2717 /* send nzlocal to others, and recv other's nzlocal */ 2718 /*--------------------------------------------------*/ 2719 if (reuse == MAT_INITIAL_MATRIX) { 2720 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2721 2722 s_waits2 = s_waits3 + nsends; 2723 s_waits1 = s_waits2 + nsends; 2724 r_waits1 = s_waits1 + nsends; 2725 r_waits2 = r_waits1 + nrecvs; 2726 r_waits3 = r_waits2 + nrecvs; 2727 } else { 2728 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2729 2730 r_waits3 = s_waits3 + nsends; 2731 } 2732 2733 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2734 if (reuse == MAT_INITIAL_MATRIX) { 2735 /* get new tags to keep the communication clean */ 2736 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2737 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2738 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2739 2740 /* post receives of other's nzlocal */ 2741 for (i=0; i<nrecvs; i++) { 2742 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2743 } 2744 /* send nzlocal to others */ 2745 for (i=0; i<nsends; i++) { 2746 sbuf_nz[i] = nzlocal; 2747 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2748 } 2749 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2750 count = nrecvs; 2751 while (count) { 2752 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2753 2754 recv_rank[imdex] = recv_status.MPI_SOURCE; 2755 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2756 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2757 2758 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2759 2760 rbuf_nz[imdex] += i + 2; 2761 2762 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2763 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2764 count--; 2765 } 2766 /* wait on sends of nzlocal */ 2767 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2768 /* send mat->i,j to others, and recv from other's */ 2769 /*------------------------------------------------*/ 2770 for (i=0; i<nsends; i++) { 2771 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2772 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2773 } 2774 /* wait on receives of mat->i,j */ 2775 /*------------------------------*/ 2776 count = nrecvs; 2777 while (count) { 2778 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2779 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2780 count--; 2781 } 2782 /* wait on sends of mat->i,j */ 2783 /*---------------------------*/ 2784 if (nsends) { 2785 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2786 } 2787 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2788 2789 /* post receives, send and receive mat->a */ 2790 /*----------------------------------------*/ 2791 for (imdex=0; imdex<nrecvs; imdex++) { 2792 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2793 } 2794 for (i=0; i<nsends; i++) { 2795 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2796 } 2797 count = nrecvs; 2798 while (count) { 2799 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2800 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2801 count--; 2802 } 2803 if (nsends) { 2804 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2805 } 2806 2807 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2808 2809 /* create redundant matrix */ 2810 /*-------------------------*/ 2811 if (reuse == MAT_INITIAL_MATRIX) { 2812 /* compute rownz_max for preallocation */ 2813 for (imdex=0; imdex<nrecvs; imdex++) { 2814 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2815 rptr = rbuf_j[imdex]; 2816 for (i=0; i<j; i++) { 2817 ncols = rptr[i+1] - rptr[i]; 2818 if (rownz_max < ncols) rownz_max = ncols; 2819 } 2820 } 2821 2822 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2823 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2824 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2825 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2826 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2827 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2828 } else { 2829 C = *matredundant; 2830 } 2831 2832 /* insert local matrix entries */ 2833 rptr = sbuf_j; 2834 cols = sbuf_j + rend-rstart + 1; 2835 vals = sbuf_a; 2836 for (i=0; i<rend-rstart; i++) { 2837 row = i + rstart; 2838 ncols = rptr[i+1] - rptr[i]; 2839 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2840 vals += ncols; 2841 cols += ncols; 2842 } 2843 /* insert received matrix entries */ 2844 for (imdex=0; imdex<nrecvs; imdex++) { 2845 rstart = rowrange[recv_rank[imdex]]; 2846 rend = rowrange[recv_rank[imdex]+1]; 2847 rptr = rbuf_j[imdex]; 2848 cols = rbuf_j[imdex] + rend-rstart + 1; 2849 vals = rbuf_a[imdex]; 2850 for (i=0; i<rend-rstart; i++) { 2851 row = i + rstart; 2852 ncols = rptr[i+1] - rptr[i]; 2853 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2854 vals += ncols; 2855 cols += ncols; 2856 } 2857 } 2858 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2859 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2860 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2861 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2862 if (reuse == MAT_INITIAL_MATRIX) { 2863 PetscContainer container; 2864 *matredundant = C; 2865 /* create a supporting struct and attach it to C for reuse */ 2866 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2867 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2868 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2869 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2870 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2871 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2872 2873 redund->nzlocal = nzlocal; 2874 redund->nsends = nsends; 2875 redund->nrecvs = nrecvs; 2876 redund->send_rank = send_rank; 2877 redund->recv_rank = recv_rank; 2878 redund->sbuf_nz = sbuf_nz; 2879 redund->rbuf_nz = rbuf_nz; 2880 redund->sbuf_j = sbuf_j; 2881 redund->sbuf_a = sbuf_a; 2882 redund->rbuf_j = rbuf_j; 2883 redund->rbuf_a = rbuf_a; 2884 2885 redund->Destroy = C->ops->destroy; 2886 C->ops->destroy = MatDestroy_MatRedundant; 2887 } 2888 PetscFunctionReturn(0); 2889 } 2890 2891 #undef __FUNCT__ 2892 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2893 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2894 { 2895 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2896 PetscErrorCode ierr; 2897 PetscInt i,*idxb = 0; 2898 PetscScalar *va,*vb; 2899 Vec vtmp; 2900 2901 PetscFunctionBegin; 2902 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2903 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2904 if (idx) { 2905 for (i=0; i<A->rmap->n; i++) { 2906 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2907 } 2908 } 2909 2910 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2911 if (idx) { 2912 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2913 } 2914 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2915 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2916 2917 for (i=0; i<A->rmap->n; i++) { 2918 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2919 va[i] = vb[i]; 2920 if (idx) idx[i] = a->garray[idxb[i]]; 2921 } 2922 } 2923 2924 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2925 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2926 ierr = PetscFree(idxb);CHKERRQ(ierr); 2927 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2928 PetscFunctionReturn(0); 2929 } 2930 2931 #undef __FUNCT__ 2932 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2933 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2934 { 2935 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2936 PetscErrorCode ierr; 2937 PetscInt i,*idxb = 0; 2938 PetscScalar *va,*vb; 2939 Vec vtmp; 2940 2941 PetscFunctionBegin; 2942 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2943 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2944 if (idx) { 2945 for (i=0; i<A->cmap->n; i++) { 2946 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2947 } 2948 } 2949 2950 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2951 if (idx) { 2952 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2953 } 2954 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2955 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2956 2957 for (i=0; i<A->rmap->n; i++) { 2958 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2959 va[i] = vb[i]; 2960 if (idx) idx[i] = a->garray[idxb[i]]; 2961 } 2962 } 2963 2964 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2965 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2966 ierr = PetscFree(idxb);CHKERRQ(ierr); 2967 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2968 PetscFunctionReturn(0); 2969 } 2970 2971 #undef __FUNCT__ 2972 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2973 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2974 { 2975 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2976 PetscInt n = A->rmap->n; 2977 PetscInt cstart = A->cmap->rstart; 2978 PetscInt *cmap = mat->garray; 2979 PetscInt *diagIdx, *offdiagIdx; 2980 Vec diagV, offdiagV; 2981 PetscScalar *a, *diagA, *offdiagA; 2982 PetscInt r; 2983 PetscErrorCode ierr; 2984 2985 PetscFunctionBegin; 2986 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2987 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2988 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2989 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2990 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2991 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2992 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2993 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2994 for (r = 0; r < n; ++r) { 2995 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2996 a[r] = diagA[r]; 2997 idx[r] = cstart + diagIdx[r]; 2998 } else { 2999 a[r] = offdiagA[r]; 3000 idx[r] = cmap[offdiagIdx[r]]; 3001 } 3002 } 3003 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3004 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3005 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3006 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3007 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3008 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3009 PetscFunctionReturn(0); 3010 } 3011 3012 #undef __FUNCT__ 3013 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3014 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3015 { 3016 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3017 PetscInt n = A->rmap->n; 3018 PetscInt cstart = A->cmap->rstart; 3019 PetscInt *cmap = mat->garray; 3020 PetscInt *diagIdx, *offdiagIdx; 3021 Vec diagV, offdiagV; 3022 PetscScalar *a, *diagA, *offdiagA; 3023 PetscInt r; 3024 PetscErrorCode ierr; 3025 3026 PetscFunctionBegin; 3027 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3028 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3029 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3030 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3031 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3032 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3033 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3034 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3035 for (r = 0; r < n; ++r) { 3036 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3037 a[r] = diagA[r]; 3038 idx[r] = cstart + diagIdx[r]; 3039 } else { 3040 a[r] = offdiagA[r]; 3041 idx[r] = cmap[offdiagIdx[r]]; 3042 } 3043 } 3044 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3045 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3046 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3047 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3048 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3049 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3050 PetscFunctionReturn(0); 3051 } 3052 3053 #undef __FUNCT__ 3054 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3055 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3056 { 3057 PetscErrorCode ierr; 3058 Mat *dummy; 3059 3060 PetscFunctionBegin; 3061 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3062 *newmat = *dummy; 3063 ierr = PetscFree(dummy);CHKERRQ(ierr); 3064 PetscFunctionReturn(0); 3065 } 3066 3067 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3068 3069 #undef __FUNCT__ 3070 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3071 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3072 { 3073 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3074 PetscErrorCode ierr; 3075 3076 PetscFunctionBegin; 3077 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3078 PetscFunctionReturn(0); 3079 } 3080 3081 #undef __FUNCT__ 3082 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3083 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3084 { 3085 PetscErrorCode ierr; 3086 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3087 3088 PetscFunctionBegin; 3089 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3090 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3091 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3092 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3093 PetscFunctionReturn(0); 3094 } 3095 3096 /* -------------------------------------------------------------------*/ 3097 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3098 MatGetRow_MPIAIJ, 3099 MatRestoreRow_MPIAIJ, 3100 MatMult_MPIAIJ, 3101 /* 4*/ MatMultAdd_MPIAIJ, 3102 MatMultTranspose_MPIAIJ, 3103 MatMultTransposeAdd_MPIAIJ, 3104 #if defined(PETSC_HAVE_PBGL) 3105 MatSolve_MPIAIJ, 3106 #else 3107 0, 3108 #endif 3109 0, 3110 0, 3111 /*10*/ 0, 3112 0, 3113 0, 3114 MatSOR_MPIAIJ, 3115 MatTranspose_MPIAIJ, 3116 /*15*/ MatGetInfo_MPIAIJ, 3117 MatEqual_MPIAIJ, 3118 MatGetDiagonal_MPIAIJ, 3119 MatDiagonalScale_MPIAIJ, 3120 MatNorm_MPIAIJ, 3121 /*20*/ MatAssemblyBegin_MPIAIJ, 3122 MatAssemblyEnd_MPIAIJ, 3123 MatSetOption_MPIAIJ, 3124 MatZeroEntries_MPIAIJ, 3125 /*24*/ MatZeroRows_MPIAIJ, 3126 0, 3127 #if defined(PETSC_HAVE_PBGL) 3128 0, 3129 #else 3130 0, 3131 #endif 3132 0, 3133 0, 3134 /*29*/ MatSetUp_MPIAIJ, 3135 #if defined(PETSC_HAVE_PBGL) 3136 0, 3137 #else 3138 0, 3139 #endif 3140 0, 3141 0, 3142 0, 3143 /*34*/ MatDuplicate_MPIAIJ, 3144 0, 3145 0, 3146 0, 3147 0, 3148 /*39*/ MatAXPY_MPIAIJ, 3149 MatGetSubMatrices_MPIAIJ, 3150 MatIncreaseOverlap_MPIAIJ, 3151 MatGetValues_MPIAIJ, 3152 MatCopy_MPIAIJ, 3153 /*44*/ MatGetRowMax_MPIAIJ, 3154 MatScale_MPIAIJ, 3155 0, 3156 0, 3157 MatZeroRowsColumns_MPIAIJ, 3158 /*49*/ MatSetRandom_MPIAIJ, 3159 0, 3160 0, 3161 0, 3162 0, 3163 /*54*/ MatFDColoringCreate_MPIAIJ, 3164 0, 3165 MatSetUnfactored_MPIAIJ, 3166 MatPermute_MPIAIJ, 3167 0, 3168 /*59*/ MatGetSubMatrix_MPIAIJ, 3169 MatDestroy_MPIAIJ, 3170 MatView_MPIAIJ, 3171 0, 3172 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3173 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3174 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3175 0, 3176 0, 3177 0, 3178 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3179 MatGetRowMinAbs_MPIAIJ, 3180 0, 3181 MatSetColoring_MPIAIJ, 3182 0, 3183 MatSetValuesAdifor_MPIAIJ, 3184 /*75*/ MatFDColoringApply_AIJ, 3185 0, 3186 0, 3187 0, 3188 MatFindZeroDiagonals_MPIAIJ, 3189 /*80*/ 0, 3190 0, 3191 0, 3192 /*83*/ MatLoad_MPIAIJ, 3193 0, 3194 0, 3195 0, 3196 0, 3197 0, 3198 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3199 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3200 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3201 MatPtAP_MPIAIJ_MPIAIJ, 3202 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3203 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3204 0, 3205 0, 3206 0, 3207 0, 3208 /*99*/ 0, 3209 0, 3210 0, 3211 MatConjugate_MPIAIJ, 3212 0, 3213 /*104*/MatSetValuesRow_MPIAIJ, 3214 MatRealPart_MPIAIJ, 3215 MatImaginaryPart_MPIAIJ, 3216 0, 3217 0, 3218 /*109*/0, 3219 MatGetRedundantMatrix_MPIAIJ, 3220 MatGetRowMin_MPIAIJ, 3221 0, 3222 0, 3223 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3224 0, 3225 0, 3226 0, 3227 0, 3228 /*119*/0, 3229 0, 3230 0, 3231 0, 3232 MatGetMultiProcBlock_MPIAIJ, 3233 /*124*/MatFindNonzeroRows_MPIAIJ, 3234 MatGetColumnNorms_MPIAIJ, 3235 MatInvertBlockDiagonal_MPIAIJ, 3236 0, 3237 MatGetSubMatricesParallel_MPIAIJ, 3238 /*129*/0, 3239 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3240 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3241 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3242 0, 3243 /*134*/0, 3244 0, 3245 0, 3246 0, 3247 0, 3248 /*139*/0, 3249 0 3250 }; 3251 3252 /* ----------------------------------------------------------------------------------------*/ 3253 3254 #undef __FUNCT__ 3255 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3256 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3257 { 3258 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3259 PetscErrorCode ierr; 3260 3261 PetscFunctionBegin; 3262 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3263 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3264 PetscFunctionReturn(0); 3265 } 3266 3267 #undef __FUNCT__ 3268 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3269 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3270 { 3271 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3272 PetscErrorCode ierr; 3273 3274 PetscFunctionBegin; 3275 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3276 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3277 PetscFunctionReturn(0); 3278 } 3279 3280 #undef __FUNCT__ 3281 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3282 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3283 { 3284 Mat_MPIAIJ *b; 3285 PetscErrorCode ierr; 3286 3287 PetscFunctionBegin; 3288 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3289 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3290 b = (Mat_MPIAIJ*)B->data; 3291 3292 if (!B->preallocated) { 3293 /* Explicitly create 2 MATSEQAIJ matrices. */ 3294 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3295 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3296 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3297 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3298 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3299 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3300 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3301 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3302 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3303 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3304 } 3305 3306 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3307 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3308 B->preallocated = PETSC_TRUE; 3309 PetscFunctionReturn(0); 3310 } 3311 3312 #undef __FUNCT__ 3313 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3314 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3315 { 3316 Mat mat; 3317 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3318 PetscErrorCode ierr; 3319 3320 PetscFunctionBegin; 3321 *newmat = 0; 3322 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3323 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3324 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3325 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3326 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3327 a = (Mat_MPIAIJ*)mat->data; 3328 3329 mat->factortype = matin->factortype; 3330 mat->rmap->bs = matin->rmap->bs; 3331 mat->cmap->bs = matin->cmap->bs; 3332 mat->assembled = PETSC_TRUE; 3333 mat->insertmode = NOT_SET_VALUES; 3334 mat->preallocated = PETSC_TRUE; 3335 3336 a->size = oldmat->size; 3337 a->rank = oldmat->rank; 3338 a->donotstash = oldmat->donotstash; 3339 a->roworiented = oldmat->roworiented; 3340 a->rowindices = 0; 3341 a->rowvalues = 0; 3342 a->getrowactive = PETSC_FALSE; 3343 3344 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3345 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3346 3347 if (oldmat->colmap) { 3348 #if defined(PETSC_USE_CTABLE) 3349 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3350 #else 3351 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3352 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3353 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3354 #endif 3355 } else a->colmap = 0; 3356 if (oldmat->garray) { 3357 PetscInt len; 3358 len = oldmat->B->cmap->n; 3359 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3360 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3361 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3362 } else a->garray = 0; 3363 3364 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3365 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3366 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3367 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3368 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3369 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3370 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3371 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3372 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3373 *newmat = mat; 3374 PetscFunctionReturn(0); 3375 } 3376 3377 3378 3379 #undef __FUNCT__ 3380 #define __FUNCT__ "MatLoad_MPIAIJ" 3381 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3382 { 3383 PetscScalar *vals,*svals; 3384 MPI_Comm comm; 3385 PetscErrorCode ierr; 3386 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3387 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3388 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3389 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3390 PetscInt cend,cstart,n,*rowners,sizesset=1; 3391 int fd; 3392 PetscInt bs = 1; 3393 3394 PetscFunctionBegin; 3395 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3396 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3397 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3398 if (!rank) { 3399 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3400 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3401 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3402 } 3403 3404 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3405 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3406 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3407 3408 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3409 3410 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3411 M = header[1]; N = header[2]; 3412 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3413 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3414 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3415 3416 /* If global sizes are set, check if they are consistent with that given in the file */ 3417 if (sizesset) { 3418 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3419 } 3420 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3421 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3422 3423 /* determine ownership of all (block) rows */ 3424 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3425 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3426 else m = newMat->rmap->n; /* Set by user */ 3427 3428 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3429 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3430 3431 /* First process needs enough room for process with most rows */ 3432 if (!rank) { 3433 mmax = rowners[1]; 3434 for (i=2; i<=size; i++) { 3435 mmax = PetscMax(mmax, rowners[i]); 3436 } 3437 } else mmax = -1; /* unused, but compilers complain */ 3438 3439 rowners[0] = 0; 3440 for (i=2; i<=size; i++) { 3441 rowners[i] += rowners[i-1]; 3442 } 3443 rstart = rowners[rank]; 3444 rend = rowners[rank+1]; 3445 3446 /* distribute row lengths to all processors */ 3447 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 3448 if (!rank) { 3449 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3450 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3451 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3452 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3453 for (j=0; j<m; j++) { 3454 procsnz[0] += ourlens[j]; 3455 } 3456 for (i=1; i<size; i++) { 3457 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3458 /* calculate the number of nonzeros on each processor */ 3459 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3460 procsnz[i] += rowlengths[j]; 3461 } 3462 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3463 } 3464 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3465 } else { 3466 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3467 } 3468 3469 if (!rank) { 3470 /* determine max buffer needed and allocate it */ 3471 maxnz = 0; 3472 for (i=0; i<size; i++) { 3473 maxnz = PetscMax(maxnz,procsnz[i]); 3474 } 3475 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3476 3477 /* read in my part of the matrix column indices */ 3478 nz = procsnz[0]; 3479 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3480 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3481 3482 /* read in every one elses and ship off */ 3483 for (i=1; i<size; i++) { 3484 nz = procsnz[i]; 3485 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3486 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3487 } 3488 ierr = PetscFree(cols);CHKERRQ(ierr); 3489 } else { 3490 /* determine buffer space needed for message */ 3491 nz = 0; 3492 for (i=0; i<m; i++) { 3493 nz += ourlens[i]; 3494 } 3495 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3496 3497 /* receive message of column indices*/ 3498 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3499 } 3500 3501 /* determine column ownership if matrix is not square */ 3502 if (N != M) { 3503 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3504 else n = newMat->cmap->n; 3505 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3506 cstart = cend - n; 3507 } else { 3508 cstart = rstart; 3509 cend = rend; 3510 n = cend - cstart; 3511 } 3512 3513 /* loop over local rows, determining number of off diagonal entries */ 3514 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3515 jj = 0; 3516 for (i=0; i<m; i++) { 3517 for (j=0; j<ourlens[i]; j++) { 3518 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3519 jj++; 3520 } 3521 } 3522 3523 for (i=0; i<m; i++) { 3524 ourlens[i] -= offlens[i]; 3525 } 3526 if (!sizesset) { 3527 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3528 } 3529 3530 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3531 3532 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3533 3534 for (i=0; i<m; i++) { 3535 ourlens[i] += offlens[i]; 3536 } 3537 3538 if (!rank) { 3539 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3540 3541 /* read in my part of the matrix numerical values */ 3542 nz = procsnz[0]; 3543 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3544 3545 /* insert into matrix */ 3546 jj = rstart; 3547 smycols = mycols; 3548 svals = vals; 3549 for (i=0; i<m; i++) { 3550 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3551 smycols += ourlens[i]; 3552 svals += ourlens[i]; 3553 jj++; 3554 } 3555 3556 /* read in other processors and ship out */ 3557 for (i=1; i<size; i++) { 3558 nz = procsnz[i]; 3559 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3560 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3561 } 3562 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3563 } else { 3564 /* receive numeric values */ 3565 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3566 3567 /* receive message of values*/ 3568 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3569 3570 /* insert into matrix */ 3571 jj = rstart; 3572 smycols = mycols; 3573 svals = vals; 3574 for (i=0; i<m; i++) { 3575 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3576 smycols += ourlens[i]; 3577 svals += ourlens[i]; 3578 jj++; 3579 } 3580 } 3581 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3582 ierr = PetscFree(vals);CHKERRQ(ierr); 3583 ierr = PetscFree(mycols);CHKERRQ(ierr); 3584 ierr = PetscFree(rowners);CHKERRQ(ierr); 3585 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3586 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3587 PetscFunctionReturn(0); 3588 } 3589 3590 #undef __FUNCT__ 3591 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3592 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3593 { 3594 PetscErrorCode ierr; 3595 IS iscol_local; 3596 PetscInt csize; 3597 3598 PetscFunctionBegin; 3599 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3600 if (call == MAT_REUSE_MATRIX) { 3601 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3602 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3603 } else { 3604 PetscInt cbs; 3605 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3606 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3607 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3608 } 3609 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3610 if (call == MAT_INITIAL_MATRIX) { 3611 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3612 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3613 } 3614 PetscFunctionReturn(0); 3615 } 3616 3617 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3618 #undef __FUNCT__ 3619 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3620 /* 3621 Not great since it makes two copies of the submatrix, first an SeqAIJ 3622 in local and then by concatenating the local matrices the end result. 3623 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3624 3625 Note: This requires a sequential iscol with all indices. 3626 */ 3627 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3628 { 3629 PetscErrorCode ierr; 3630 PetscMPIInt rank,size; 3631 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3632 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3633 PetscBool allcolumns, colflag; 3634 Mat M,Mreuse; 3635 MatScalar *vwork,*aa; 3636 MPI_Comm comm; 3637 Mat_SeqAIJ *aij; 3638 3639 PetscFunctionBegin; 3640 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3641 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3642 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3643 3644 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3645 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3646 if (colflag && ncol == mat->cmap->N) { 3647 allcolumns = PETSC_TRUE; 3648 } else { 3649 allcolumns = PETSC_FALSE; 3650 } 3651 if (call == MAT_REUSE_MATRIX) { 3652 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3653 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3654 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3655 } else { 3656 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3657 } 3658 3659 /* 3660 m - number of local rows 3661 n - number of columns (same on all processors) 3662 rstart - first row in new global matrix generated 3663 */ 3664 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3665 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3666 if (call == MAT_INITIAL_MATRIX) { 3667 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3668 ii = aij->i; 3669 jj = aij->j; 3670 3671 /* 3672 Determine the number of non-zeros in the diagonal and off-diagonal 3673 portions of the matrix in order to do correct preallocation 3674 */ 3675 3676 /* first get start and end of "diagonal" columns */ 3677 if (csize == PETSC_DECIDE) { 3678 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3679 if (mglobal == n) { /* square matrix */ 3680 nlocal = m; 3681 } else { 3682 nlocal = n/size + ((n % size) > rank); 3683 } 3684 } else { 3685 nlocal = csize; 3686 } 3687 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3688 rstart = rend - nlocal; 3689 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3690 3691 /* next, compute all the lengths */ 3692 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3693 olens = dlens + m; 3694 for (i=0; i<m; i++) { 3695 jend = ii[i+1] - ii[i]; 3696 olen = 0; 3697 dlen = 0; 3698 for (j=0; j<jend; j++) { 3699 if (*jj < rstart || *jj >= rend) olen++; 3700 else dlen++; 3701 jj++; 3702 } 3703 olens[i] = olen; 3704 dlens[i] = dlen; 3705 } 3706 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3707 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3708 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3709 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3710 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3711 ierr = PetscFree(dlens);CHKERRQ(ierr); 3712 } else { 3713 PetscInt ml,nl; 3714 3715 M = *newmat; 3716 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3717 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3718 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3719 /* 3720 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3721 rather than the slower MatSetValues(). 3722 */ 3723 M->was_assembled = PETSC_TRUE; 3724 M->assembled = PETSC_FALSE; 3725 } 3726 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3727 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3728 ii = aij->i; 3729 jj = aij->j; 3730 aa = aij->a; 3731 for (i=0; i<m; i++) { 3732 row = rstart + i; 3733 nz = ii[i+1] - ii[i]; 3734 cwork = jj; jj += nz; 3735 vwork = aa; aa += nz; 3736 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3737 } 3738 3739 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3740 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3741 *newmat = M; 3742 3743 /* save submatrix used in processor for next request */ 3744 if (call == MAT_INITIAL_MATRIX) { 3745 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3746 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3747 } 3748 PetscFunctionReturn(0); 3749 } 3750 3751 #undef __FUNCT__ 3752 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3753 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3754 { 3755 PetscInt m,cstart, cend,j,nnz,i,d; 3756 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3757 const PetscInt *JJ; 3758 PetscScalar *values; 3759 PetscErrorCode ierr; 3760 3761 PetscFunctionBegin; 3762 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3763 3764 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3765 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3766 m = B->rmap->n; 3767 cstart = B->cmap->rstart; 3768 cend = B->cmap->rend; 3769 rstart = B->rmap->rstart; 3770 3771 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3772 3773 #if defined(PETSC_USE_DEBUGGING) 3774 for (i=0; i<m; i++) { 3775 nnz = Ii[i+1]- Ii[i]; 3776 JJ = J + Ii[i]; 3777 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3778 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3779 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3780 } 3781 #endif 3782 3783 for (i=0; i<m; i++) { 3784 nnz = Ii[i+1]- Ii[i]; 3785 JJ = J + Ii[i]; 3786 nnz_max = PetscMax(nnz_max,nnz); 3787 d = 0; 3788 for (j=0; j<nnz; j++) { 3789 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3790 } 3791 d_nnz[i] = d; 3792 o_nnz[i] = nnz - d; 3793 } 3794 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3795 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3796 3797 if (v) values = (PetscScalar*)v; 3798 else { 3799 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3800 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3801 } 3802 3803 for (i=0; i<m; i++) { 3804 ii = i + rstart; 3805 nnz = Ii[i+1]- Ii[i]; 3806 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3807 } 3808 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3809 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3810 3811 if (!v) { 3812 ierr = PetscFree(values);CHKERRQ(ierr); 3813 } 3814 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3815 PetscFunctionReturn(0); 3816 } 3817 3818 #undef __FUNCT__ 3819 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3820 /*@ 3821 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3822 (the default parallel PETSc format). 3823 3824 Collective on MPI_Comm 3825 3826 Input Parameters: 3827 + B - the matrix 3828 . i - the indices into j for the start of each local row (starts with zero) 3829 . j - the column indices for each local row (starts with zero) 3830 - v - optional values in the matrix 3831 3832 Level: developer 3833 3834 Notes: 3835 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3836 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3837 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3838 3839 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3840 3841 The format which is used for the sparse matrix input, is equivalent to a 3842 row-major ordering.. i.e for the following matrix, the input data expected is 3843 as shown: 3844 3845 1 0 0 3846 2 0 3 P0 3847 ------- 3848 4 5 6 P1 3849 3850 Process0 [P0]: rows_owned=[0,1] 3851 i = {0,1,3} [size = nrow+1 = 2+1] 3852 j = {0,0,2} [size = nz = 6] 3853 v = {1,2,3} [size = nz = 6] 3854 3855 Process1 [P1]: rows_owned=[2] 3856 i = {0,3} [size = nrow+1 = 1+1] 3857 j = {0,1,2} [size = nz = 6] 3858 v = {4,5,6} [size = nz = 6] 3859 3860 .keywords: matrix, aij, compressed row, sparse, parallel 3861 3862 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3863 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3864 @*/ 3865 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3866 { 3867 PetscErrorCode ierr; 3868 3869 PetscFunctionBegin; 3870 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3871 PetscFunctionReturn(0); 3872 } 3873 3874 #undef __FUNCT__ 3875 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3876 /*@C 3877 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3878 (the default parallel PETSc format). For good matrix assembly performance 3879 the user should preallocate the matrix storage by setting the parameters 3880 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3881 performance can be increased by more than a factor of 50. 3882 3883 Collective on MPI_Comm 3884 3885 Input Parameters: 3886 + A - the matrix 3887 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3888 (same value is used for all local rows) 3889 . d_nnz - array containing the number of nonzeros in the various rows of the 3890 DIAGONAL portion of the local submatrix (possibly different for each row) 3891 or NULL, if d_nz is used to specify the nonzero structure. 3892 The size of this array is equal to the number of local rows, i.e 'm'. 3893 For matrices that will be factored, you must leave room for (and set) 3894 the diagonal entry even if it is zero. 3895 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3896 submatrix (same value is used for all local rows). 3897 - o_nnz - array containing the number of nonzeros in the various rows of the 3898 OFF-DIAGONAL portion of the local submatrix (possibly different for 3899 each row) or NULL, if o_nz is used to specify the nonzero 3900 structure. The size of this array is equal to the number 3901 of local rows, i.e 'm'. 3902 3903 If the *_nnz parameter is given then the *_nz parameter is ignored 3904 3905 The AIJ format (also called the Yale sparse matrix format or 3906 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3907 storage. The stored row and column indices begin with zero. 3908 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3909 3910 The parallel matrix is partitioned such that the first m0 rows belong to 3911 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3912 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3913 3914 The DIAGONAL portion of the local submatrix of a processor can be defined 3915 as the submatrix which is obtained by extraction the part corresponding to 3916 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3917 first row that belongs to the processor, r2 is the last row belonging to 3918 the this processor, and c1-c2 is range of indices of the local part of a 3919 vector suitable for applying the matrix to. This is an mxn matrix. In the 3920 common case of a square matrix, the row and column ranges are the same and 3921 the DIAGONAL part is also square. The remaining portion of the local 3922 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3923 3924 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3925 3926 You can call MatGetInfo() to get information on how effective the preallocation was; 3927 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3928 You can also run with the option -info and look for messages with the string 3929 malloc in them to see if additional memory allocation was needed. 3930 3931 Example usage: 3932 3933 Consider the following 8x8 matrix with 34 non-zero values, that is 3934 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3935 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3936 as follows: 3937 3938 .vb 3939 1 2 0 | 0 3 0 | 0 4 3940 Proc0 0 5 6 | 7 0 0 | 8 0 3941 9 0 10 | 11 0 0 | 12 0 3942 ------------------------------------- 3943 13 0 14 | 15 16 17 | 0 0 3944 Proc1 0 18 0 | 19 20 21 | 0 0 3945 0 0 0 | 22 23 0 | 24 0 3946 ------------------------------------- 3947 Proc2 25 26 27 | 0 0 28 | 29 0 3948 30 0 0 | 31 32 33 | 0 34 3949 .ve 3950 3951 This can be represented as a collection of submatrices as: 3952 3953 .vb 3954 A B C 3955 D E F 3956 G H I 3957 .ve 3958 3959 Where the submatrices A,B,C are owned by proc0, D,E,F are 3960 owned by proc1, G,H,I are owned by proc2. 3961 3962 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3963 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3964 The 'M','N' parameters are 8,8, and have the same values on all procs. 3965 3966 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3967 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3968 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3969 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3970 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3971 matrix, ans [DF] as another SeqAIJ matrix. 3972 3973 When d_nz, o_nz parameters are specified, d_nz storage elements are 3974 allocated for every row of the local diagonal submatrix, and o_nz 3975 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3976 One way to choose d_nz and o_nz is to use the max nonzerors per local 3977 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3978 In this case, the values of d_nz,o_nz are: 3979 .vb 3980 proc0 : dnz = 2, o_nz = 2 3981 proc1 : dnz = 3, o_nz = 2 3982 proc2 : dnz = 1, o_nz = 4 3983 .ve 3984 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3985 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3986 for proc3. i.e we are using 12+15+10=37 storage locations to store 3987 34 values. 3988 3989 When d_nnz, o_nnz parameters are specified, the storage is specified 3990 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3991 In the above case the values for d_nnz,o_nnz are: 3992 .vb 3993 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3994 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3995 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3996 .ve 3997 Here the space allocated is sum of all the above values i.e 34, and 3998 hence pre-allocation is perfect. 3999 4000 Level: intermediate 4001 4002 .keywords: matrix, aij, compressed row, sparse, parallel 4003 4004 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4005 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4006 @*/ 4007 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4008 { 4009 PetscErrorCode ierr; 4010 4011 PetscFunctionBegin; 4012 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4013 PetscValidType(B,1); 4014 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4015 PetscFunctionReturn(0); 4016 } 4017 4018 #undef __FUNCT__ 4019 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4020 /*@ 4021 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4022 CSR format the local rows. 4023 4024 Collective on MPI_Comm 4025 4026 Input Parameters: 4027 + comm - MPI communicator 4028 . m - number of local rows (Cannot be PETSC_DECIDE) 4029 . n - This value should be the same as the local size used in creating the 4030 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4031 calculated if N is given) For square matrices n is almost always m. 4032 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4033 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4034 . i - row indices 4035 . j - column indices 4036 - a - matrix values 4037 4038 Output Parameter: 4039 . mat - the matrix 4040 4041 Level: intermediate 4042 4043 Notes: 4044 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4045 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4046 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4047 4048 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4049 4050 The format which is used for the sparse matrix input, is equivalent to a 4051 row-major ordering.. i.e for the following matrix, the input data expected is 4052 as shown: 4053 4054 1 0 0 4055 2 0 3 P0 4056 ------- 4057 4 5 6 P1 4058 4059 Process0 [P0]: rows_owned=[0,1] 4060 i = {0,1,3} [size = nrow+1 = 2+1] 4061 j = {0,0,2} [size = nz = 6] 4062 v = {1,2,3} [size = nz = 6] 4063 4064 Process1 [P1]: rows_owned=[2] 4065 i = {0,3} [size = nrow+1 = 1+1] 4066 j = {0,1,2} [size = nz = 6] 4067 v = {4,5,6} [size = nz = 6] 4068 4069 .keywords: matrix, aij, compressed row, sparse, parallel 4070 4071 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4072 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4073 @*/ 4074 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4075 { 4076 PetscErrorCode ierr; 4077 4078 PetscFunctionBegin; 4079 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4080 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4081 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4082 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4083 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4084 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4085 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4086 PetscFunctionReturn(0); 4087 } 4088 4089 #undef __FUNCT__ 4090 #define __FUNCT__ "MatCreateAIJ" 4091 /*@C 4092 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4093 (the default parallel PETSc format). For good matrix assembly performance 4094 the user should preallocate the matrix storage by setting the parameters 4095 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4096 performance can be increased by more than a factor of 50. 4097 4098 Collective on MPI_Comm 4099 4100 Input Parameters: 4101 + comm - MPI communicator 4102 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4103 This value should be the same as the local size used in creating the 4104 y vector for the matrix-vector product y = Ax. 4105 . n - This value should be the same as the local size used in creating the 4106 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4107 calculated if N is given) For square matrices n is almost always m. 4108 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4109 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4110 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4111 (same value is used for all local rows) 4112 . d_nnz - array containing the number of nonzeros in the various rows of the 4113 DIAGONAL portion of the local submatrix (possibly different for each row) 4114 or NULL, if d_nz is used to specify the nonzero structure. 4115 The size of this array is equal to the number of local rows, i.e 'm'. 4116 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4117 submatrix (same value is used for all local rows). 4118 - o_nnz - array containing the number of nonzeros in the various rows of the 4119 OFF-DIAGONAL portion of the local submatrix (possibly different for 4120 each row) or NULL, if o_nz is used to specify the nonzero 4121 structure. The size of this array is equal to the number 4122 of local rows, i.e 'm'. 4123 4124 Output Parameter: 4125 . A - the matrix 4126 4127 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4128 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4129 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4130 4131 Notes: 4132 If the *_nnz parameter is given then the *_nz parameter is ignored 4133 4134 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4135 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4136 storage requirements for this matrix. 4137 4138 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4139 processor than it must be used on all processors that share the object for 4140 that argument. 4141 4142 The user MUST specify either the local or global matrix dimensions 4143 (possibly both). 4144 4145 The parallel matrix is partitioned across processors such that the 4146 first m0 rows belong to process 0, the next m1 rows belong to 4147 process 1, the next m2 rows belong to process 2 etc.. where 4148 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4149 values corresponding to [m x N] submatrix. 4150 4151 The columns are logically partitioned with the n0 columns belonging 4152 to 0th partition, the next n1 columns belonging to the next 4153 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4154 4155 The DIAGONAL portion of the local submatrix on any given processor 4156 is the submatrix corresponding to the rows and columns m,n 4157 corresponding to the given processor. i.e diagonal matrix on 4158 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4159 etc. The remaining portion of the local submatrix [m x (N-n)] 4160 constitute the OFF-DIAGONAL portion. The example below better 4161 illustrates this concept. 4162 4163 For a square global matrix we define each processor's diagonal portion 4164 to be its local rows and the corresponding columns (a square submatrix); 4165 each processor's off-diagonal portion encompasses the remainder of the 4166 local matrix (a rectangular submatrix). 4167 4168 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4169 4170 When calling this routine with a single process communicator, a matrix of 4171 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4172 type of communicator, use the construction mechanism: 4173 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4174 4175 By default, this format uses inodes (identical nodes) when possible. 4176 We search for consecutive rows with the same nonzero structure, thereby 4177 reusing matrix information to achieve increased efficiency. 4178 4179 Options Database Keys: 4180 + -mat_no_inode - Do not use inodes 4181 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4182 - -mat_aij_oneindex - Internally use indexing starting at 1 4183 rather than 0. Note that when calling MatSetValues(), 4184 the user still MUST index entries starting at 0! 4185 4186 4187 Example usage: 4188 4189 Consider the following 8x8 matrix with 34 non-zero values, that is 4190 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4191 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4192 as follows: 4193 4194 .vb 4195 1 2 0 | 0 3 0 | 0 4 4196 Proc0 0 5 6 | 7 0 0 | 8 0 4197 9 0 10 | 11 0 0 | 12 0 4198 ------------------------------------- 4199 13 0 14 | 15 16 17 | 0 0 4200 Proc1 0 18 0 | 19 20 21 | 0 0 4201 0 0 0 | 22 23 0 | 24 0 4202 ------------------------------------- 4203 Proc2 25 26 27 | 0 0 28 | 29 0 4204 30 0 0 | 31 32 33 | 0 34 4205 .ve 4206 4207 This can be represented as a collection of submatrices as: 4208 4209 .vb 4210 A B C 4211 D E F 4212 G H I 4213 .ve 4214 4215 Where the submatrices A,B,C are owned by proc0, D,E,F are 4216 owned by proc1, G,H,I are owned by proc2. 4217 4218 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4219 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4220 The 'M','N' parameters are 8,8, and have the same values on all procs. 4221 4222 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4223 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4224 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4225 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4226 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4227 matrix, ans [DF] as another SeqAIJ matrix. 4228 4229 When d_nz, o_nz parameters are specified, d_nz storage elements are 4230 allocated for every row of the local diagonal submatrix, and o_nz 4231 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4232 One way to choose d_nz and o_nz is to use the max nonzerors per local 4233 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4234 In this case, the values of d_nz,o_nz are: 4235 .vb 4236 proc0 : dnz = 2, o_nz = 2 4237 proc1 : dnz = 3, o_nz = 2 4238 proc2 : dnz = 1, o_nz = 4 4239 .ve 4240 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4241 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4242 for proc3. i.e we are using 12+15+10=37 storage locations to store 4243 34 values. 4244 4245 When d_nnz, o_nnz parameters are specified, the storage is specified 4246 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4247 In the above case the values for d_nnz,o_nnz are: 4248 .vb 4249 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4250 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4251 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4252 .ve 4253 Here the space allocated is sum of all the above values i.e 34, and 4254 hence pre-allocation is perfect. 4255 4256 Level: intermediate 4257 4258 .keywords: matrix, aij, compressed row, sparse, parallel 4259 4260 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4261 MPIAIJ, MatCreateMPIAIJWithArrays() 4262 @*/ 4263 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4264 { 4265 PetscErrorCode ierr; 4266 PetscMPIInt size; 4267 4268 PetscFunctionBegin; 4269 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4270 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4271 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4272 if (size > 1) { 4273 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4274 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4275 } else { 4276 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4277 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4278 } 4279 PetscFunctionReturn(0); 4280 } 4281 4282 #undef __FUNCT__ 4283 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4284 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4285 { 4286 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4287 4288 PetscFunctionBegin; 4289 *Ad = a->A; 4290 *Ao = a->B; 4291 *colmap = a->garray; 4292 PetscFunctionReturn(0); 4293 } 4294 4295 #undef __FUNCT__ 4296 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4297 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4298 { 4299 PetscErrorCode ierr; 4300 PetscInt i; 4301 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4302 4303 PetscFunctionBegin; 4304 if (coloring->ctype == IS_COLORING_GLOBAL) { 4305 ISColoringValue *allcolors,*colors; 4306 ISColoring ocoloring; 4307 4308 /* set coloring for diagonal portion */ 4309 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4310 4311 /* set coloring for off-diagonal portion */ 4312 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4313 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4314 for (i=0; i<a->B->cmap->n; i++) { 4315 colors[i] = allcolors[a->garray[i]]; 4316 } 4317 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4318 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4319 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4320 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4321 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4322 ISColoringValue *colors; 4323 PetscInt *larray; 4324 ISColoring ocoloring; 4325 4326 /* set coloring for diagonal portion */ 4327 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4328 for (i=0; i<a->A->cmap->n; i++) { 4329 larray[i] = i + A->cmap->rstart; 4330 } 4331 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4332 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4333 for (i=0; i<a->A->cmap->n; i++) { 4334 colors[i] = coloring->colors[larray[i]]; 4335 } 4336 ierr = PetscFree(larray);CHKERRQ(ierr); 4337 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4338 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4339 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4340 4341 /* set coloring for off-diagonal portion */ 4342 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4343 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4344 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4345 for (i=0; i<a->B->cmap->n; i++) { 4346 colors[i] = coloring->colors[larray[i]]; 4347 } 4348 ierr = PetscFree(larray);CHKERRQ(ierr); 4349 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4350 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4351 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4352 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4353 PetscFunctionReturn(0); 4354 } 4355 4356 #undef __FUNCT__ 4357 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4358 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4359 { 4360 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4361 PetscErrorCode ierr; 4362 4363 PetscFunctionBegin; 4364 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4365 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4366 PetscFunctionReturn(0); 4367 } 4368 4369 #undef __FUNCT__ 4370 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4371 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4372 { 4373 PetscErrorCode ierr; 4374 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4375 PetscInt *indx; 4376 4377 PetscFunctionBegin; 4378 /* This routine will ONLY return MPIAIJ type matrix */ 4379 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4380 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4381 if (n == PETSC_DECIDE) { 4382 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4383 } 4384 /* Check sum(n) = N */ 4385 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4386 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4387 4388 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4389 rstart -= m; 4390 4391 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4392 for (i=0; i<m; i++) { 4393 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4394 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4395 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4396 } 4397 4398 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4399 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4400 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4401 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4402 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4403 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4404 PetscFunctionReturn(0); 4405 } 4406 4407 #undef __FUNCT__ 4408 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4409 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4410 { 4411 PetscErrorCode ierr; 4412 PetscInt m,N,i,rstart,nnz,Ii; 4413 PetscInt *indx; 4414 PetscScalar *values; 4415 4416 PetscFunctionBegin; 4417 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4418 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4419 for (i=0; i<m; i++) { 4420 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4421 Ii = i + rstart; 4422 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4423 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4424 } 4425 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4426 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4427 PetscFunctionReturn(0); 4428 } 4429 4430 #undef __FUNCT__ 4431 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4432 /*@ 4433 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4434 matrices from each processor 4435 4436 Collective on MPI_Comm 4437 4438 Input Parameters: 4439 + comm - the communicators the parallel matrix will live on 4440 . inmat - the input sequential matrices 4441 . n - number of local columns (or PETSC_DECIDE) 4442 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4443 4444 Output Parameter: 4445 . outmat - the parallel matrix generated 4446 4447 Level: advanced 4448 4449 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4450 4451 @*/ 4452 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4453 { 4454 PetscErrorCode ierr; 4455 4456 PetscFunctionBegin; 4457 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4458 if (scall == MAT_INITIAL_MATRIX) { 4459 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4460 } 4461 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4462 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4463 PetscFunctionReturn(0); 4464 } 4465 4466 #undef __FUNCT__ 4467 #define __FUNCT__ "MatFileSplit" 4468 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4469 { 4470 PetscErrorCode ierr; 4471 PetscMPIInt rank; 4472 PetscInt m,N,i,rstart,nnz; 4473 size_t len; 4474 const PetscInt *indx; 4475 PetscViewer out; 4476 char *name; 4477 Mat B; 4478 const PetscScalar *values; 4479 4480 PetscFunctionBegin; 4481 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4482 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4483 /* Should this be the type of the diagonal block of A? */ 4484 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4485 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4486 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4487 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4488 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4489 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4490 for (i=0; i<m; i++) { 4491 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4492 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4493 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4494 } 4495 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4496 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4497 4498 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4499 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4500 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4501 sprintf(name,"%s.%d",outfile,rank); 4502 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4503 ierr = PetscFree(name);CHKERRQ(ierr); 4504 ierr = MatView(B,out);CHKERRQ(ierr); 4505 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4506 ierr = MatDestroy(&B);CHKERRQ(ierr); 4507 PetscFunctionReturn(0); 4508 } 4509 4510 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4511 #undef __FUNCT__ 4512 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4513 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4514 { 4515 PetscErrorCode ierr; 4516 Mat_Merge_SeqsToMPI *merge; 4517 PetscContainer container; 4518 4519 PetscFunctionBegin; 4520 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4521 if (container) { 4522 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4523 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4524 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4525 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4526 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4527 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4528 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4529 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4530 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4531 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4532 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4533 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4534 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4535 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4536 ierr = PetscFree(merge);CHKERRQ(ierr); 4537 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4538 } 4539 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4540 PetscFunctionReturn(0); 4541 } 4542 4543 #include <../src/mat/utils/freespace.h> 4544 #include <petscbt.h> 4545 4546 #undef __FUNCT__ 4547 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4548 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4549 { 4550 PetscErrorCode ierr; 4551 MPI_Comm comm; 4552 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4553 PetscMPIInt size,rank,taga,*len_s; 4554 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4555 PetscInt proc,m; 4556 PetscInt **buf_ri,**buf_rj; 4557 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4558 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4559 MPI_Request *s_waits,*r_waits; 4560 MPI_Status *status; 4561 MatScalar *aa=a->a; 4562 MatScalar **abuf_r,*ba_i; 4563 Mat_Merge_SeqsToMPI *merge; 4564 PetscContainer container; 4565 4566 PetscFunctionBegin; 4567 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4568 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4569 4570 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4571 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4572 4573 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4574 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4575 4576 bi = merge->bi; 4577 bj = merge->bj; 4578 buf_ri = merge->buf_ri; 4579 buf_rj = merge->buf_rj; 4580 4581 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4582 owners = merge->rowmap->range; 4583 len_s = merge->len_s; 4584 4585 /* send and recv matrix values */ 4586 /*-----------------------------*/ 4587 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4588 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4589 4590 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4591 for (proc=0,k=0; proc<size; proc++) { 4592 if (!len_s[proc]) continue; 4593 i = owners[proc]; 4594 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4595 k++; 4596 } 4597 4598 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4599 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4600 ierr = PetscFree(status);CHKERRQ(ierr); 4601 4602 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4603 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4604 4605 /* insert mat values of mpimat */ 4606 /*----------------------------*/ 4607 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4608 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4609 4610 for (k=0; k<merge->nrecv; k++) { 4611 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4612 nrows = *(buf_ri_k[k]); 4613 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4614 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4615 } 4616 4617 /* set values of ba */ 4618 m = merge->rowmap->n; 4619 for (i=0; i<m; i++) { 4620 arow = owners[rank] + i; 4621 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4622 bnzi = bi[i+1] - bi[i]; 4623 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4624 4625 /* add local non-zero vals of this proc's seqmat into ba */ 4626 anzi = ai[arow+1] - ai[arow]; 4627 aj = a->j + ai[arow]; 4628 aa = a->a + ai[arow]; 4629 nextaj = 0; 4630 for (j=0; nextaj<anzi; j++) { 4631 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4632 ba_i[j] += aa[nextaj++]; 4633 } 4634 } 4635 4636 /* add received vals into ba */ 4637 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4638 /* i-th row */ 4639 if (i == *nextrow[k]) { 4640 anzi = *(nextai[k]+1) - *nextai[k]; 4641 aj = buf_rj[k] + *(nextai[k]); 4642 aa = abuf_r[k] + *(nextai[k]); 4643 nextaj = 0; 4644 for (j=0; nextaj<anzi; j++) { 4645 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4646 ba_i[j] += aa[nextaj++]; 4647 } 4648 } 4649 nextrow[k]++; nextai[k]++; 4650 } 4651 } 4652 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4653 } 4654 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4655 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4656 4657 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4658 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4659 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4660 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4661 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4662 PetscFunctionReturn(0); 4663 } 4664 4665 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4666 4667 #undef __FUNCT__ 4668 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4669 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4670 { 4671 PetscErrorCode ierr; 4672 Mat B_mpi; 4673 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4674 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4675 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4676 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4677 PetscInt len,proc,*dnz,*onz,bs,cbs; 4678 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4679 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4680 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4681 MPI_Status *status; 4682 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4683 PetscBT lnkbt; 4684 Mat_Merge_SeqsToMPI *merge; 4685 PetscContainer container; 4686 4687 PetscFunctionBegin; 4688 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4689 4690 /* make sure it is a PETSc comm */ 4691 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4692 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4693 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4694 4695 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4696 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4697 4698 /* determine row ownership */ 4699 /*---------------------------------------------------------*/ 4700 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4701 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4702 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4703 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4704 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4705 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4706 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4707 4708 m = merge->rowmap->n; 4709 owners = merge->rowmap->range; 4710 4711 /* determine the number of messages to send, their lengths */ 4712 /*---------------------------------------------------------*/ 4713 len_s = merge->len_s; 4714 4715 len = 0; /* length of buf_si[] */ 4716 merge->nsend = 0; 4717 for (proc=0; proc<size; proc++) { 4718 len_si[proc] = 0; 4719 if (proc == rank) { 4720 len_s[proc] = 0; 4721 } else { 4722 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4723 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4724 } 4725 if (len_s[proc]) { 4726 merge->nsend++; 4727 nrows = 0; 4728 for (i=owners[proc]; i<owners[proc+1]; i++) { 4729 if (ai[i+1] > ai[i]) nrows++; 4730 } 4731 len_si[proc] = 2*(nrows+1); 4732 len += len_si[proc]; 4733 } 4734 } 4735 4736 /* determine the number and length of messages to receive for ij-structure */ 4737 /*-------------------------------------------------------------------------*/ 4738 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4739 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4740 4741 /* post the Irecv of j-structure */ 4742 /*-------------------------------*/ 4743 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4744 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4745 4746 /* post the Isend of j-structure */ 4747 /*--------------------------------*/ 4748 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4749 4750 for (proc=0, k=0; proc<size; proc++) { 4751 if (!len_s[proc]) continue; 4752 i = owners[proc]; 4753 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4754 k++; 4755 } 4756 4757 /* receives and sends of j-structure are complete */ 4758 /*------------------------------------------------*/ 4759 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4760 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4761 4762 /* send and recv i-structure */ 4763 /*---------------------------*/ 4764 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4765 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4766 4767 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4768 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4769 for (proc=0,k=0; proc<size; proc++) { 4770 if (!len_s[proc]) continue; 4771 /* form outgoing message for i-structure: 4772 buf_si[0]: nrows to be sent 4773 [1:nrows]: row index (global) 4774 [nrows+1:2*nrows+1]: i-structure index 4775 */ 4776 /*-------------------------------------------*/ 4777 nrows = len_si[proc]/2 - 1; 4778 buf_si_i = buf_si + nrows+1; 4779 buf_si[0] = nrows; 4780 buf_si_i[0] = 0; 4781 nrows = 0; 4782 for (i=owners[proc]; i<owners[proc+1]; i++) { 4783 anzi = ai[i+1] - ai[i]; 4784 if (anzi) { 4785 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4786 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4787 nrows++; 4788 } 4789 } 4790 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4791 k++; 4792 buf_si += len_si[proc]; 4793 } 4794 4795 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4796 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4797 4798 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4799 for (i=0; i<merge->nrecv; i++) { 4800 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4801 } 4802 4803 ierr = PetscFree(len_si);CHKERRQ(ierr); 4804 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4805 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4806 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4807 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4808 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4809 ierr = PetscFree(status);CHKERRQ(ierr); 4810 4811 /* compute a local seq matrix in each processor */ 4812 /*----------------------------------------------*/ 4813 /* allocate bi array and free space for accumulating nonzero column info */ 4814 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4815 bi[0] = 0; 4816 4817 /* create and initialize a linked list */ 4818 nlnk = N+1; 4819 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4820 4821 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4822 len = ai[owners[rank+1]] - ai[owners[rank]]; 4823 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4824 4825 current_space = free_space; 4826 4827 /* determine symbolic info for each local row */ 4828 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4829 4830 for (k=0; k<merge->nrecv; k++) { 4831 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4832 nrows = *buf_ri_k[k]; 4833 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4834 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4835 } 4836 4837 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4838 len = 0; 4839 for (i=0; i<m; i++) { 4840 bnzi = 0; 4841 /* add local non-zero cols of this proc's seqmat into lnk */ 4842 arow = owners[rank] + i; 4843 anzi = ai[arow+1] - ai[arow]; 4844 aj = a->j + ai[arow]; 4845 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4846 bnzi += nlnk; 4847 /* add received col data into lnk */ 4848 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4849 if (i == *nextrow[k]) { /* i-th row */ 4850 anzi = *(nextai[k]+1) - *nextai[k]; 4851 aj = buf_rj[k] + *nextai[k]; 4852 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4853 bnzi += nlnk; 4854 nextrow[k]++; nextai[k]++; 4855 } 4856 } 4857 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4858 4859 /* if free space is not available, make more free space */ 4860 if (current_space->local_remaining<bnzi) { 4861 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4862 nspacedouble++; 4863 } 4864 /* copy data into free space, then initialize lnk */ 4865 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4866 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4867 4868 current_space->array += bnzi; 4869 current_space->local_used += bnzi; 4870 current_space->local_remaining -= bnzi; 4871 4872 bi[i+1] = bi[i] + bnzi; 4873 } 4874 4875 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4876 4877 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4878 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4879 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4880 4881 /* create symbolic parallel matrix B_mpi */ 4882 /*---------------------------------------*/ 4883 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4884 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4885 if (n==PETSC_DECIDE) { 4886 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4887 } else { 4888 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4889 } 4890 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4891 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4892 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4893 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4894 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4895 4896 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4897 B_mpi->assembled = PETSC_FALSE; 4898 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4899 merge->bi = bi; 4900 merge->bj = bj; 4901 merge->buf_ri = buf_ri; 4902 merge->buf_rj = buf_rj; 4903 merge->coi = NULL; 4904 merge->coj = NULL; 4905 merge->owners_co = NULL; 4906 4907 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4908 4909 /* attach the supporting struct to B_mpi for reuse */ 4910 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4911 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4912 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4913 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4914 *mpimat = B_mpi; 4915 4916 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4917 PetscFunctionReturn(0); 4918 } 4919 4920 #undef __FUNCT__ 4921 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4922 /*@C 4923 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4924 matrices from each processor 4925 4926 Collective on MPI_Comm 4927 4928 Input Parameters: 4929 + comm - the communicators the parallel matrix will live on 4930 . seqmat - the input sequential matrices 4931 . m - number of local rows (or PETSC_DECIDE) 4932 . n - number of local columns (or PETSC_DECIDE) 4933 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4934 4935 Output Parameter: 4936 . mpimat - the parallel matrix generated 4937 4938 Level: advanced 4939 4940 Notes: 4941 The dimensions of the sequential matrix in each processor MUST be the same. 4942 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4943 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4944 @*/ 4945 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4946 { 4947 PetscErrorCode ierr; 4948 PetscMPIInt size; 4949 4950 PetscFunctionBegin; 4951 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4952 if (size == 1) { 4953 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4954 if (scall == MAT_INITIAL_MATRIX) { 4955 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4956 } else { 4957 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4958 } 4959 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4960 PetscFunctionReturn(0); 4961 } 4962 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4963 if (scall == MAT_INITIAL_MATRIX) { 4964 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4965 } 4966 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4967 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4968 PetscFunctionReturn(0); 4969 } 4970 4971 #undef __FUNCT__ 4972 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4973 /*@ 4974 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4975 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4976 with MatGetSize() 4977 4978 Not Collective 4979 4980 Input Parameters: 4981 + A - the matrix 4982 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4983 4984 Output Parameter: 4985 . A_loc - the local sequential matrix generated 4986 4987 Level: developer 4988 4989 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4990 4991 @*/ 4992 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4993 { 4994 PetscErrorCode ierr; 4995 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4996 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4997 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4998 MatScalar *aa=a->a,*ba=b->a,*cam; 4999 PetscScalar *ca; 5000 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5001 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5002 PetscBool match; 5003 5004 PetscFunctionBegin; 5005 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5006 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5007 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5008 if (scall == MAT_INITIAL_MATRIX) { 5009 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5010 ci[0] = 0; 5011 for (i=0; i<am; i++) { 5012 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5013 } 5014 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5015 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5016 k = 0; 5017 for (i=0; i<am; i++) { 5018 ncols_o = bi[i+1] - bi[i]; 5019 ncols_d = ai[i+1] - ai[i]; 5020 /* off-diagonal portion of A */ 5021 for (jo=0; jo<ncols_o; jo++) { 5022 col = cmap[*bj]; 5023 if (col >= cstart) break; 5024 cj[k] = col; bj++; 5025 ca[k++] = *ba++; 5026 } 5027 /* diagonal portion of A */ 5028 for (j=0; j<ncols_d; j++) { 5029 cj[k] = cstart + *aj++; 5030 ca[k++] = *aa++; 5031 } 5032 /* off-diagonal portion of A */ 5033 for (j=jo; j<ncols_o; j++) { 5034 cj[k] = cmap[*bj++]; 5035 ca[k++] = *ba++; 5036 } 5037 } 5038 /* put together the new matrix */ 5039 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5040 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5041 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5042 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5043 mat->free_a = PETSC_TRUE; 5044 mat->free_ij = PETSC_TRUE; 5045 mat->nonew = 0; 5046 } else if (scall == MAT_REUSE_MATRIX) { 5047 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5048 ci = mat->i; cj = mat->j; cam = mat->a; 5049 for (i=0; i<am; i++) { 5050 /* off-diagonal portion of A */ 5051 ncols_o = bi[i+1] - bi[i]; 5052 for (jo=0; jo<ncols_o; jo++) { 5053 col = cmap[*bj]; 5054 if (col >= cstart) break; 5055 *cam++ = *ba++; bj++; 5056 } 5057 /* diagonal portion of A */ 5058 ncols_d = ai[i+1] - ai[i]; 5059 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5060 /* off-diagonal portion of A */ 5061 for (j=jo; j<ncols_o; j++) { 5062 *cam++ = *ba++; bj++; 5063 } 5064 } 5065 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5066 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5067 PetscFunctionReturn(0); 5068 } 5069 5070 #undef __FUNCT__ 5071 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5072 /*@C 5073 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5074 5075 Not Collective 5076 5077 Input Parameters: 5078 + A - the matrix 5079 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5080 - row, col - index sets of rows and columns to extract (or NULL) 5081 5082 Output Parameter: 5083 . A_loc - the local sequential matrix generated 5084 5085 Level: developer 5086 5087 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5088 5089 @*/ 5090 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5091 { 5092 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5093 PetscErrorCode ierr; 5094 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5095 IS isrowa,iscola; 5096 Mat *aloc; 5097 PetscBool match; 5098 5099 PetscFunctionBegin; 5100 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5101 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5102 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5103 if (!row) { 5104 start = A->rmap->rstart; end = A->rmap->rend; 5105 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5106 } else { 5107 isrowa = *row; 5108 } 5109 if (!col) { 5110 start = A->cmap->rstart; 5111 cmap = a->garray; 5112 nzA = a->A->cmap->n; 5113 nzB = a->B->cmap->n; 5114 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5115 ncols = 0; 5116 for (i=0; i<nzB; i++) { 5117 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5118 else break; 5119 } 5120 imark = i; 5121 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5122 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5123 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5124 } else { 5125 iscola = *col; 5126 } 5127 if (scall != MAT_INITIAL_MATRIX) { 5128 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5129 aloc[0] = *A_loc; 5130 } 5131 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5132 *A_loc = aloc[0]; 5133 ierr = PetscFree(aloc);CHKERRQ(ierr); 5134 if (!row) { 5135 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5136 } 5137 if (!col) { 5138 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5139 } 5140 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5141 PetscFunctionReturn(0); 5142 } 5143 5144 #undef __FUNCT__ 5145 #define __FUNCT__ "MatGetBrowsOfAcols" 5146 /*@C 5147 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5148 5149 Collective on Mat 5150 5151 Input Parameters: 5152 + A,B - the matrices in mpiaij format 5153 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5154 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5155 5156 Output Parameter: 5157 + rowb, colb - index sets of rows and columns of B to extract 5158 - B_seq - the sequential matrix generated 5159 5160 Level: developer 5161 5162 @*/ 5163 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5164 { 5165 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5166 PetscErrorCode ierr; 5167 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5168 IS isrowb,iscolb; 5169 Mat *bseq=NULL; 5170 5171 PetscFunctionBegin; 5172 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5173 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5174 } 5175 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5176 5177 if (scall == MAT_INITIAL_MATRIX) { 5178 start = A->cmap->rstart; 5179 cmap = a->garray; 5180 nzA = a->A->cmap->n; 5181 nzB = a->B->cmap->n; 5182 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5183 ncols = 0; 5184 for (i=0; i<nzB; i++) { /* row < local row index */ 5185 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5186 else break; 5187 } 5188 imark = i; 5189 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5190 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5191 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5192 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5193 } else { 5194 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5195 isrowb = *rowb; iscolb = *colb; 5196 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5197 bseq[0] = *B_seq; 5198 } 5199 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5200 *B_seq = bseq[0]; 5201 ierr = PetscFree(bseq);CHKERRQ(ierr); 5202 if (!rowb) { 5203 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5204 } else { 5205 *rowb = isrowb; 5206 } 5207 if (!colb) { 5208 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5209 } else { 5210 *colb = iscolb; 5211 } 5212 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5213 PetscFunctionReturn(0); 5214 } 5215 5216 #undef __FUNCT__ 5217 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5218 /* 5219 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5220 of the OFF-DIAGONAL portion of local A 5221 5222 Collective on Mat 5223 5224 Input Parameters: 5225 + A,B - the matrices in mpiaij format 5226 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5227 5228 Output Parameter: 5229 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5230 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5231 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5232 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5233 5234 Level: developer 5235 5236 */ 5237 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5238 { 5239 VecScatter_MPI_General *gen_to,*gen_from; 5240 PetscErrorCode ierr; 5241 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5242 Mat_SeqAIJ *b_oth; 5243 VecScatter ctx =a->Mvctx; 5244 MPI_Comm comm; 5245 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5246 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5247 PetscScalar *rvalues,*svalues; 5248 MatScalar *b_otha,*bufa,*bufA; 5249 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5250 MPI_Request *rwaits = NULL,*swaits = NULL; 5251 MPI_Status *sstatus,rstatus; 5252 PetscMPIInt jj; 5253 PetscInt *cols,sbs,rbs; 5254 PetscScalar *vals; 5255 5256 PetscFunctionBegin; 5257 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5258 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5259 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5260 } 5261 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5262 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5263 5264 gen_to = (VecScatter_MPI_General*)ctx->todata; 5265 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5266 rvalues = gen_from->values; /* holds the length of receiving row */ 5267 svalues = gen_to->values; /* holds the length of sending row */ 5268 nrecvs = gen_from->n; 5269 nsends = gen_to->n; 5270 5271 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5272 srow = gen_to->indices; /* local row index to be sent */ 5273 sstarts = gen_to->starts; 5274 sprocs = gen_to->procs; 5275 sstatus = gen_to->sstatus; 5276 sbs = gen_to->bs; 5277 rstarts = gen_from->starts; 5278 rprocs = gen_from->procs; 5279 rbs = gen_from->bs; 5280 5281 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5282 if (scall == MAT_INITIAL_MATRIX) { 5283 /* i-array */ 5284 /*---------*/ 5285 /* post receives */ 5286 for (i=0; i<nrecvs; i++) { 5287 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5288 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5289 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5290 } 5291 5292 /* pack the outgoing message */ 5293 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5294 5295 sstartsj[0] = 0; 5296 rstartsj[0] = 0; 5297 len = 0; /* total length of j or a array to be sent */ 5298 k = 0; 5299 for (i=0; i<nsends; i++) { 5300 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5301 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5302 for (j=0; j<nrows; j++) { 5303 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5304 for (l=0; l<sbs; l++) { 5305 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5306 5307 rowlen[j*sbs+l] = ncols; 5308 5309 len += ncols; 5310 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5311 } 5312 k++; 5313 } 5314 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5315 5316 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5317 } 5318 /* recvs and sends of i-array are completed */ 5319 i = nrecvs; 5320 while (i--) { 5321 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5322 } 5323 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5324 5325 /* allocate buffers for sending j and a arrays */ 5326 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5327 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5328 5329 /* create i-array of B_oth */ 5330 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5331 5332 b_othi[0] = 0; 5333 len = 0; /* total length of j or a array to be received */ 5334 k = 0; 5335 for (i=0; i<nrecvs; i++) { 5336 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5337 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5338 for (j=0; j<nrows; j++) { 5339 b_othi[k+1] = b_othi[k] + rowlen[j]; 5340 len += rowlen[j]; k++; 5341 } 5342 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5343 } 5344 5345 /* allocate space for j and a arrrays of B_oth */ 5346 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5347 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5348 5349 /* j-array */ 5350 /*---------*/ 5351 /* post receives of j-array */ 5352 for (i=0; i<nrecvs; i++) { 5353 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5354 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5355 } 5356 5357 /* pack the outgoing message j-array */ 5358 k = 0; 5359 for (i=0; i<nsends; i++) { 5360 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5361 bufJ = bufj+sstartsj[i]; 5362 for (j=0; j<nrows; j++) { 5363 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5364 for (ll=0; ll<sbs; ll++) { 5365 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5366 for (l=0; l<ncols; l++) { 5367 *bufJ++ = cols[l]; 5368 } 5369 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5370 } 5371 } 5372 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5373 } 5374 5375 /* recvs and sends of j-array are completed */ 5376 i = nrecvs; 5377 while (i--) { 5378 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5379 } 5380 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5381 } else if (scall == MAT_REUSE_MATRIX) { 5382 sstartsj = *startsj_s; 5383 rstartsj = *startsj_r; 5384 bufa = *bufa_ptr; 5385 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5386 b_otha = b_oth->a; 5387 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5388 5389 /* a-array */ 5390 /*---------*/ 5391 /* post receives of a-array */ 5392 for (i=0; i<nrecvs; i++) { 5393 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5394 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5395 } 5396 5397 /* pack the outgoing message a-array */ 5398 k = 0; 5399 for (i=0; i<nsends; i++) { 5400 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5401 bufA = bufa+sstartsj[i]; 5402 for (j=0; j<nrows; j++) { 5403 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5404 for (ll=0; ll<sbs; ll++) { 5405 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5406 for (l=0; l<ncols; l++) { 5407 *bufA++ = vals[l]; 5408 } 5409 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5410 } 5411 } 5412 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5413 } 5414 /* recvs and sends of a-array are completed */ 5415 i = nrecvs; 5416 while (i--) { 5417 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5418 } 5419 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5420 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5421 5422 if (scall == MAT_INITIAL_MATRIX) { 5423 /* put together the new matrix */ 5424 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5425 5426 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5427 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5428 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5429 b_oth->free_a = PETSC_TRUE; 5430 b_oth->free_ij = PETSC_TRUE; 5431 b_oth->nonew = 0; 5432 5433 ierr = PetscFree(bufj);CHKERRQ(ierr); 5434 if (!startsj_s || !bufa_ptr) { 5435 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5436 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5437 } else { 5438 *startsj_s = sstartsj; 5439 *startsj_r = rstartsj; 5440 *bufa_ptr = bufa; 5441 } 5442 } 5443 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5444 PetscFunctionReturn(0); 5445 } 5446 5447 #undef __FUNCT__ 5448 #define __FUNCT__ "MatGetCommunicationStructs" 5449 /*@C 5450 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5451 5452 Not Collective 5453 5454 Input Parameters: 5455 . A - The matrix in mpiaij format 5456 5457 Output Parameter: 5458 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5459 . colmap - A map from global column index to local index into lvec 5460 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5461 5462 Level: developer 5463 5464 @*/ 5465 #if defined(PETSC_USE_CTABLE) 5466 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5467 #else 5468 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5469 #endif 5470 { 5471 Mat_MPIAIJ *a; 5472 5473 PetscFunctionBegin; 5474 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5475 PetscValidPointer(lvec, 2); 5476 PetscValidPointer(colmap, 3); 5477 PetscValidPointer(multScatter, 4); 5478 a = (Mat_MPIAIJ*) A->data; 5479 if (lvec) *lvec = a->lvec; 5480 if (colmap) *colmap = a->colmap; 5481 if (multScatter) *multScatter = a->Mvctx; 5482 PetscFunctionReturn(0); 5483 } 5484 5485 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5486 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5487 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5488 5489 #undef __FUNCT__ 5490 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5491 /* 5492 Computes (B'*A')' since computing B*A directly is untenable 5493 5494 n p p 5495 ( ) ( ) ( ) 5496 m ( A ) * n ( B ) = m ( C ) 5497 ( ) ( ) ( ) 5498 5499 */ 5500 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5501 { 5502 PetscErrorCode ierr; 5503 Mat At,Bt,Ct; 5504 5505 PetscFunctionBegin; 5506 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5507 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5508 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5509 ierr = MatDestroy(&At);CHKERRQ(ierr); 5510 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5511 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5512 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5513 PetscFunctionReturn(0); 5514 } 5515 5516 #undef __FUNCT__ 5517 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5518 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5519 { 5520 PetscErrorCode ierr; 5521 PetscInt m=A->rmap->n,n=B->cmap->n; 5522 Mat Cmat; 5523 5524 PetscFunctionBegin; 5525 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5526 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5527 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5528 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5529 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5530 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5531 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5532 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5533 5534 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5535 5536 *C = Cmat; 5537 PetscFunctionReturn(0); 5538 } 5539 5540 /* ----------------------------------------------------------------*/ 5541 #undef __FUNCT__ 5542 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5543 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5544 { 5545 PetscErrorCode ierr; 5546 5547 PetscFunctionBegin; 5548 if (scall == MAT_INITIAL_MATRIX) { 5549 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5550 } 5551 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5552 PetscFunctionReturn(0); 5553 } 5554 5555 #if defined(PETSC_HAVE_MUMPS) 5556 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5557 #endif 5558 #if defined(PETSC_HAVE_PASTIX) 5559 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5560 #endif 5561 #if defined(PETSC_HAVE_SUPERLU_DIST) 5562 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5563 #endif 5564 #if defined(PETSC_HAVE_CLIQUE) 5565 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5566 #endif 5567 5568 /*MC 5569 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5570 5571 Options Database Keys: 5572 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5573 5574 Level: beginner 5575 5576 .seealso: MatCreateAIJ() 5577 M*/ 5578 5579 #undef __FUNCT__ 5580 #define __FUNCT__ "MatCreate_MPIAIJ" 5581 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5582 { 5583 Mat_MPIAIJ *b; 5584 PetscErrorCode ierr; 5585 PetscMPIInt size; 5586 5587 PetscFunctionBegin; 5588 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5589 5590 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5591 B->data = (void*)b; 5592 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5593 B->assembled = PETSC_FALSE; 5594 B->insertmode = NOT_SET_VALUES; 5595 b->size = size; 5596 5597 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5598 5599 /* build cache for off array entries formed */ 5600 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5601 5602 b->donotstash = PETSC_FALSE; 5603 b->colmap = 0; 5604 b->garray = 0; 5605 b->roworiented = PETSC_TRUE; 5606 5607 /* stuff used for matrix vector multiply */ 5608 b->lvec = NULL; 5609 b->Mvctx = NULL; 5610 5611 /* stuff for MatGetRow() */ 5612 b->rowindices = 0; 5613 b->rowvalues = 0; 5614 b->getrowactive = PETSC_FALSE; 5615 5616 /* flexible pointer used in CUSP/CUSPARSE classes */ 5617 b->spptr = NULL; 5618 5619 #if defined(PETSC_HAVE_MUMPS) 5620 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C","MatGetFactor_aij_mumps",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5621 #endif 5622 #if defined(PETSC_HAVE_PASTIX) 5623 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C","MatGetFactor_mpiaij_pastix",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5624 #endif 5625 #if defined(PETSC_HAVE_SUPERLU_DIST) 5626 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C","MatGetFactor_mpiaij_superlu_dist",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5627 #endif 5628 #if defined(PETSC_HAVE_CLIQUE) 5629 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C","MatGetFactor_aij_clique",MatGetFactor_aij_clique);CHKERRQ(ierr); 5630 #endif 5631 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_MPIAIJ",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5632 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_MPIAIJ",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5633 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C","MatGetDiagonalBlock_MPIAIJ",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5634 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C","MatIsTranspose_MPIAIJ",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5635 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C","MatMPIAIJSetPreallocation_MPIAIJ",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5636 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C","MatMPIAIJSetPreallocationCSR_MPIAIJ",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5637 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C","MatDiagonalScaleLocal_MPIAIJ",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5638 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C","MatConvert_MPIAIJ_MPIAIJPERM",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5639 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C","MatConvert_MPIAIJ_MPIAIJCRL",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5640 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C","MatConvert_MPIAIJ_MPISBAIJ",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5641 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C","MatMatMult_MPIDense_MPIAIJ",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C","MatMatMultSymbolic_MPIDense_MPIAIJ",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5643 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C","MatMatMultNumeric_MPIDense_MPIAIJ",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5644 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5645 PetscFunctionReturn(0); 5646 } 5647 5648 #undef __FUNCT__ 5649 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5650 /*@ 5651 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5652 and "off-diagonal" part of the matrix in CSR format. 5653 5654 Collective on MPI_Comm 5655 5656 Input Parameters: 5657 + comm - MPI communicator 5658 . m - number of local rows (Cannot be PETSC_DECIDE) 5659 . n - This value should be the same as the local size used in creating the 5660 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5661 calculated if N is given) For square matrices n is almost always m. 5662 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5663 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5664 . i - row indices for "diagonal" portion of matrix 5665 . j - column indices 5666 . a - matrix values 5667 . oi - row indices for "off-diagonal" portion of matrix 5668 . oj - column indices 5669 - oa - matrix values 5670 5671 Output Parameter: 5672 . mat - the matrix 5673 5674 Level: advanced 5675 5676 Notes: 5677 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5678 must free the arrays once the matrix has been destroyed and not before. 5679 5680 The i and j indices are 0 based 5681 5682 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5683 5684 This sets local rows and cannot be used to set off-processor values. 5685 5686 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5687 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5688 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5689 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5690 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5691 communication if it is known that only local entries will be set. 5692 5693 .keywords: matrix, aij, compressed row, sparse, parallel 5694 5695 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5696 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5697 @*/ 5698 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5699 { 5700 PetscErrorCode ierr; 5701 Mat_MPIAIJ *maij; 5702 5703 PetscFunctionBegin; 5704 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5705 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5706 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5707 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5708 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5709 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5710 maij = (Mat_MPIAIJ*) (*mat)->data; 5711 5712 (*mat)->preallocated = PETSC_TRUE; 5713 5714 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5715 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5716 5717 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5718 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5719 5720 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5721 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5722 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5723 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5724 5725 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5726 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5727 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5728 PetscFunctionReturn(0); 5729 } 5730 5731 /* 5732 Special version for direct calls from Fortran 5733 */ 5734 #include <petsc-private/fortranimpl.h> 5735 5736 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5737 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5738 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5739 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5740 #endif 5741 5742 /* Change these macros so can be used in void function */ 5743 #undef CHKERRQ 5744 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5745 #undef SETERRQ2 5746 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5747 #undef SETERRQ3 5748 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5749 #undef SETERRQ 5750 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5751 5752 #undef __FUNCT__ 5753 #define __FUNCT__ "matsetvaluesmpiaij_" 5754 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5755 { 5756 Mat mat = *mmat; 5757 PetscInt m = *mm, n = *mn; 5758 InsertMode addv = *maddv; 5759 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5760 PetscScalar value; 5761 PetscErrorCode ierr; 5762 5763 MatCheckPreallocated(mat,1); 5764 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5765 5766 #if defined(PETSC_USE_DEBUG) 5767 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5768 #endif 5769 { 5770 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5771 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5772 PetscBool roworiented = aij->roworiented; 5773 5774 /* Some Variables required in the macro */ 5775 Mat A = aij->A; 5776 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5777 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5778 MatScalar *aa = a->a; 5779 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5780 Mat B = aij->B; 5781 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5782 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5783 MatScalar *ba = b->a; 5784 5785 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5786 PetscInt nonew = a->nonew; 5787 MatScalar *ap1,*ap2; 5788 5789 PetscFunctionBegin; 5790 for (i=0; i<m; i++) { 5791 if (im[i] < 0) continue; 5792 #if defined(PETSC_USE_DEBUG) 5793 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5794 #endif 5795 if (im[i] >= rstart && im[i] < rend) { 5796 row = im[i] - rstart; 5797 lastcol1 = -1; 5798 rp1 = aj + ai[row]; 5799 ap1 = aa + ai[row]; 5800 rmax1 = aimax[row]; 5801 nrow1 = ailen[row]; 5802 low1 = 0; 5803 high1 = nrow1; 5804 lastcol2 = -1; 5805 rp2 = bj + bi[row]; 5806 ap2 = ba + bi[row]; 5807 rmax2 = bimax[row]; 5808 nrow2 = bilen[row]; 5809 low2 = 0; 5810 high2 = nrow2; 5811 5812 for (j=0; j<n; j++) { 5813 if (roworiented) value = v[i*n+j]; 5814 else value = v[i+j*m]; 5815 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5816 if (in[j] >= cstart && in[j] < cend) { 5817 col = in[j] - cstart; 5818 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5819 } else if (in[j] < 0) continue; 5820 #if defined(PETSC_USE_DEBUG) 5821 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5822 #endif 5823 else { 5824 if (mat->was_assembled) { 5825 if (!aij->colmap) { 5826 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5827 } 5828 #if defined(PETSC_USE_CTABLE) 5829 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5830 col--; 5831 #else 5832 col = aij->colmap[in[j]] - 1; 5833 #endif 5834 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5835 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5836 col = in[j]; 5837 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5838 B = aij->B; 5839 b = (Mat_SeqAIJ*)B->data; 5840 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5841 rp2 = bj + bi[row]; 5842 ap2 = ba + bi[row]; 5843 rmax2 = bimax[row]; 5844 nrow2 = bilen[row]; 5845 low2 = 0; 5846 high2 = nrow2; 5847 bm = aij->B->rmap->n; 5848 ba = b->a; 5849 } 5850 } else col = in[j]; 5851 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5852 } 5853 } 5854 } else if (!aij->donotstash) { 5855 if (roworiented) { 5856 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5857 } else { 5858 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5859 } 5860 } 5861 } 5862 } 5863 PetscFunctionReturnVoid(); 5864 } 5865 5866