1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 138 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 139 if (type == NORM_2) { 140 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 141 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 142 } 143 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 144 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 145 } 146 } else if (type == NORM_1) { 147 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 148 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 149 } 150 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 151 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 152 } 153 } else if (type == NORM_INFINITY) { 154 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 155 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 156 } 157 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 158 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 159 } 160 161 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 162 if (type == NORM_INFINITY) { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 164 } else { 165 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 166 } 167 ierr = PetscFree(work);CHKERRQ(ierr); 168 if (type == NORM_2) { 169 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 170 } 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNCT__ 175 #define __FUNCT__ "MatDistribute_MPIAIJ" 176 /* 177 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 178 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 179 180 Only for square matrices 181 182 Used by a preconditioner, hence PETSC_EXTERN 183 */ 184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 185 { 186 PetscMPIInt rank,size; 187 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 188 PetscErrorCode ierr; 189 Mat mat; 190 Mat_SeqAIJ *gmata; 191 PetscMPIInt tag; 192 MPI_Status status; 193 PetscBool aij; 194 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 195 196 PetscFunctionBegin; 197 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 199 if (!rank) { 200 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 201 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 202 } 203 if (reuse == MAT_INITIAL_MATRIX) { 204 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 205 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 206 if (!rank) { 207 bses[0] = gmat->rmap->bs; 208 bses[1] = gmat->cmap->bs; 209 } 210 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 211 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 212 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 213 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 214 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 215 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 216 217 rowners[0] = 0; 218 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 219 rstart = rowners[rank]; 220 rend = rowners[rank+1]; 221 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 222 if (!rank) { 223 gmata = (Mat_SeqAIJ*) gmat->data; 224 /* send row lengths to all processors */ 225 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 226 for (i=1; i<size; i++) { 227 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 228 } 229 /* determine number diagonal and off-diagonal counts */ 230 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 231 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 232 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 233 jj = 0; 234 for (i=0; i<m; i++) { 235 for (j=0; j<dlens[i]; j++) { 236 if (gmata->j[jj] < rstart) ld[i]++; 237 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 238 jj++; 239 } 240 } 241 /* send column indices to other processes */ 242 for (i=1; i<size; i++) { 243 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 244 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 245 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 246 } 247 248 /* send numerical values to other processes */ 249 for (i=1; i<size; i++) { 250 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 251 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 252 } 253 gmataa = gmata->a; 254 gmataj = gmata->j; 255 256 } else { 257 /* receive row lengths */ 258 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 /* receive column indices */ 260 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 262 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 263 /* determine number diagonal and off-diagonal counts */ 264 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 265 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 266 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 267 jj = 0; 268 for (i=0; i<m; i++) { 269 for (j=0; j<dlens[i]; j++) { 270 if (gmataj[jj] < rstart) ld[i]++; 271 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 272 jj++; 273 } 274 } 275 /* receive numerical values */ 276 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 277 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 278 } 279 /* set preallocation */ 280 for (i=0; i<m; i++) { 281 dlens[i] -= olens[i]; 282 } 283 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 284 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 285 286 for (i=0; i<m; i++) { 287 dlens[i] += olens[i]; 288 } 289 cnt = 0; 290 for (i=0; i<m; i++) { 291 row = rstart + i; 292 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 293 cnt += dlens[i]; 294 } 295 if (rank) { 296 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 297 } 298 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 299 ierr = PetscFree(rowners);CHKERRQ(ierr); 300 301 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 302 303 *inmat = mat; 304 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 307 mat = *inmat; 308 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 309 if (!rank) { 310 /* send numerical values to other processes */ 311 gmata = (Mat_SeqAIJ*) gmat->data; 312 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 313 gmataa = gmata->a; 314 for (i=1; i<size; i++) { 315 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 316 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 317 } 318 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 319 } else { 320 /* receive numerical values from process 0*/ 321 nz = Ad->nz + Ao->nz; 322 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 326 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 327 ad = Ad->a; 328 ao = Ao->a; 329 if (mat->rmap->n) { 330 i = 0; 331 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 for (i=1; i<mat->rmap->n; i++) { 335 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 336 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 337 } 338 i--; 339 if (mat->rmap->n) { 340 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 341 } 342 if (rank) { 343 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 344 } 345 } 346 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 /* 352 Local utility routine that creates a mapping from the global column 353 number to the local number in the off-diagonal part of the local 354 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 355 a slightly higher hash table cost; without it it is not scalable (each processor 356 has an order N integer array but is fast to acess. 357 */ 358 #undef __FUNCT__ 359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 361 { 362 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 363 PetscErrorCode ierr; 364 PetscInt n = aij->B->cmap->n,i; 365 366 PetscFunctionBegin; 367 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 368 #if defined(PETSC_USE_CTABLE) 369 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 370 for (i=0; i<n; i++) { 371 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 372 } 373 #else 374 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 375 ierr = PetscLogObjectMemory((PetscObject)mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 376 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 377 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 378 #endif 379 PetscFunctionReturn(0); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 383 { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col;\ 387 while (high1-low1 > 5) { \ 388 t = (low1+high1)/2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i=low1; _i<high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) ap1[_i] += value; \ 396 else ap1[_i] = value; \ 397 goto a_noinsert; \ 398 } \ 399 } \ 400 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 401 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 402 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 403 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 404 N = nrow1++ - 1; a->nz++; high1++; \ 405 /* shift up all the later entries in this row */ \ 406 for (ii=N; ii>=_i; ii--) { \ 407 rp1[ii+1] = rp1[ii]; \ 408 ap1[ii+1] = ap1[ii]; \ 409 } \ 410 rp1[_i] = col; \ 411 ap1[_i] = value; \ 412 a_noinsert: ; \ 413 ailen[row] = nrow1; \ 414 } 415 416 417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 418 { \ 419 if (col <= lastcol2) low2 = 0; \ 420 else high2 = nrow2; \ 421 lastcol2 = col; \ 422 while (high2-low2 > 5) { \ 423 t = (low2+high2)/2; \ 424 if (rp2[t] > col) high2 = t; \ 425 else low2 = t; \ 426 } \ 427 for (_i=low2; _i<high2; _i++) { \ 428 if (rp2[_i] > col) break; \ 429 if (rp2[_i] == col) { \ 430 if (addv == ADD_VALUES) ap2[_i] += value; \ 431 else ap2[_i] = value; \ 432 goto b_noinsert; \ 433 } \ 434 } \ 435 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 436 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 437 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 438 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 439 N = nrow2++ - 1; b->nz++; high2++; \ 440 /* shift up all the later entries in this row */ \ 441 for (ii=N; ii>=_i; ii--) { \ 442 rp2[ii+1] = rp2[ii]; \ 443 ap2[ii+1] = ap2[ii]; \ 444 } \ 445 rp2[_i] = col; \ 446 ap2[_i] = value; \ 447 b_noinsert: ; \ 448 bilen[row] = nrow2; \ 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 454 { 455 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 456 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 457 PetscErrorCode ierr; 458 PetscInt l,*garray = mat->garray,diag; 459 460 PetscFunctionBegin; 461 /* code only works for square matrices A */ 462 463 /* find size of row to the left of the diagonal part */ 464 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 465 row = row - diag; 466 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 467 if (garray[b->j[b->i[row]+l]] > diag) break; 468 } 469 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 470 471 /* diagonal part */ 472 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 473 474 /* right of diagonal part */ 475 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatSetValues_MPIAIJ" 481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 PetscScalar value; 485 PetscErrorCode ierr; 486 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 487 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 488 PetscBool roworiented = aij->roworiented; 489 490 /* Some Variables required in the macro */ 491 Mat A = aij->A; 492 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 493 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 494 MatScalar *aa = a->a; 495 PetscBool ignorezeroentries = a->ignorezeroentries; 496 Mat B = aij->B; 497 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 498 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 499 MatScalar *ba = b->a; 500 501 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 502 PetscInt nonew; 503 MatScalar *ap1,*ap2; 504 505 PetscFunctionBegin; 506 if (v) PetscValidScalarPointer(v,6); 507 for (i=0; i<m; i++) { 508 if (im[i] < 0) continue; 509 #if defined(PETSC_USE_DEBUG) 510 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 511 #endif 512 if (im[i] >= rstart && im[i] < rend) { 513 row = im[i] - rstart; 514 lastcol1 = -1; 515 rp1 = aj + ai[row]; 516 ap1 = aa + ai[row]; 517 rmax1 = aimax[row]; 518 nrow1 = ailen[row]; 519 low1 = 0; 520 high1 = nrow1; 521 lastcol2 = -1; 522 rp2 = bj + bi[row]; 523 ap2 = ba + bi[row]; 524 rmax2 = bimax[row]; 525 nrow2 = bilen[row]; 526 low2 = 0; 527 high2 = nrow2; 528 529 for (j=0; j<n; j++) { 530 if (v) { 531 if (roworiented) value = v[i*n+j]; 532 else value = v[i+j*m]; 533 } else value = 0.0; 534 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 535 if (in[j] >= cstart && in[j] < cend) { 536 col = in[j] - cstart; 537 nonew = a->nonew; 538 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 539 } else if (in[j] < 0) continue; 540 #if defined(PETSC_USE_DEBUG) 541 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 542 #endif 543 else { 544 if (mat->was_assembled) { 545 if (!aij->colmap) { 546 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 547 } 548 #if defined(PETSC_USE_CTABLE) 549 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 550 col--; 551 #else 552 col = aij->colmap[in[j]] - 1; 553 #endif 554 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 555 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 556 col = in[j]; 557 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 558 B = aij->B; 559 b = (Mat_SeqAIJ*)B->data; 560 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 561 rp2 = bj + bi[row]; 562 ap2 = ba + bi[row]; 563 rmax2 = bimax[row]; 564 nrow2 = bilen[row]; 565 low2 = 0; 566 high2 = nrow2; 567 bm = aij->B->rmap->n; 568 ba = b->a; 569 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 570 } else col = in[j]; 571 nonew = b->nonew; 572 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 573 } 574 } 575 } else { 576 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 577 if (!aij->donotstash) { 578 mat->assembled = PETSC_FALSE; 579 if (roworiented) { 580 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 581 } else { 582 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 583 } 584 } 585 } 586 } 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatGetValues_MPIAIJ" 592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 593 { 594 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 595 PetscErrorCode ierr; 596 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 597 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 598 599 PetscFunctionBegin; 600 for (i=0; i<m; i++) { 601 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 602 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 603 if (idxm[i] >= rstart && idxm[i] < rend) { 604 row = idxm[i] - rstart; 605 for (j=0; j<n; j++) { 606 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 607 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 608 if (idxn[j] >= cstart && idxn[j] < cend) { 609 col = idxn[j] - cstart; 610 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 611 } else { 612 if (!aij->colmap) { 613 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 614 } 615 #if defined(PETSC_USE_CTABLE) 616 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 617 col--; 618 #else 619 col = aij->colmap[idxn[j]] - 1; 620 #endif 621 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 622 else { 623 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 624 } 625 } 626 } 627 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 628 } 629 PetscFunctionReturn(0); 630 } 631 632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt nstash,reallocs; 641 InsertMode addv; 642 643 PetscFunctionBegin; 644 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 645 646 /* make sure all processors are either in INSERTMODE or ADDMODE */ 647 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 648 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 649 mat->insertmode = addv; /* in case this processor had no cache */ 650 651 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 652 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 653 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 660 { 661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 662 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 663 PetscErrorCode ierr; 664 PetscMPIInt n; 665 PetscInt i,j,rstart,ncols,flg; 666 PetscInt *row,*col; 667 PetscBool other_disassembled; 668 PetscScalar *val; 669 InsertMode addv = mat->insertmode; 670 671 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 672 673 PetscFunctionBegin; 674 if (!aij->donotstash && !mat->nooffprocentries) { 675 while (1) { 676 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 677 if (!flg) break; 678 679 for (i=0; i<n; ) { 680 /* Now identify the consecutive vals belonging to the same row */ 681 for (j=i,rstart=row[j]; j<n; j++) { 682 if (row[j] != rstart) break; 683 } 684 if (j < n) ncols = j-i; 685 else ncols = n-i; 686 /* Now assemble all these values with a single function call */ 687 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 688 689 i = j; 690 } 691 } 692 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 693 } 694 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 695 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 696 697 /* determine if any processor has disassembled, if so we must 698 also disassemble ourselfs, in order that we may reassemble. */ 699 /* 700 if nonzero structure of submatrix B cannot change then we know that 701 no processor disassembled thus we can skip this stuff 702 */ 703 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 704 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 705 if (mat->was_assembled && !other_disassembled) { 706 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 707 } 708 } 709 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 710 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 711 } 712 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 713 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 714 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 715 716 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 717 718 aij->rowvalues = 0; 719 720 /* used by MatAXPY() */ 721 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 722 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 723 724 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 725 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 731 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 732 { 733 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 734 PetscErrorCode ierr; 735 736 PetscFunctionBegin; 737 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 738 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatZeroRows_MPIAIJ" 744 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 745 { 746 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 747 PetscInt *owners = A->rmap->range; 748 PetscInt n = A->rmap->n; 749 PetscMPIInt size = mat->size; 750 PetscSF sf; 751 PetscInt *lrows; 752 PetscSFNode *rrows; 753 PetscInt lastidx = -1, r, p = 0, len = 0; 754 PetscErrorCode ierr; 755 756 PetscFunctionBegin; 757 /* Create SF where leaves are input rows and roots are owned rows */ 758 ierr = PetscMalloc(n * sizeof(PetscInt), &lrows);CHKERRQ(ierr); 759 ierr = PetscMemzero(lrows, n * sizeof(PetscInt));CHKERRQ(ierr); 760 ierr = PetscMalloc(N * sizeof(PetscSFNode), &rrows);CHKERRQ(ierr); 761 for (r = 0; r < N; ++r) { 762 const PetscInt idx = rows[r]; 763 PetscBool found = PETSC_FALSE; 764 /* Trick for efficient searching for sorted rows */ 765 if (lastidx > idx) p = 0; 766 lastidx = idx; 767 for (; p < size; ++p) { 768 if (idx >= owners[p] && idx < owners[p+1]) { 769 rrows[r].rank = p; 770 rrows[r].index = rows[r] - owners[p]; 771 found = PETSC_TRUE; 772 break; 773 } 774 } 775 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 776 } 777 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 778 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 779 /* Collect flags for rows to be zeroed */ 780 ierr = PetscSFReduceBegin(sf, MPIU_INT, rows, lrows, MPI_LOR);CHKERRQ(ierr); 781 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 782 /* Compress and put in row numbers */ 783 for (r = 0; r < n; ++r) if (lrows[r]) lrows[len++] = r; 784 /* fix right hand side if needed */ 785 if (x && b) { 786 const PetscScalar *xx; 787 PetscScalar *bb; 788 789 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 790 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 791 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 792 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 793 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 794 } 795 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 796 ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr); 797 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 798 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 799 } else if (diag != 0.0) { 800 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 801 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 802 for (r = 0; r < len; ++r) { 803 const PetscInt row = lrows[r] + A->rmap->rstart; 804 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 805 } 806 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 807 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 808 } else { 809 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 810 } 811 ierr = PetscFree(lrows);CHKERRQ(ierr); 812 PetscFunctionReturn(0); 813 } 814 815 #undef __FUNCT__ 816 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 817 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 818 { 819 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 820 PetscErrorCode ierr; 821 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 822 PetscInt i,*owners = A->rmap->range; 823 PetscInt *nprocs,j,idx,nsends; 824 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 825 PetscInt *rvalues,count,base,slen,*source; 826 PetscInt *lens,*lrows,*values,m; 827 MPI_Comm comm; 828 MPI_Request *send_waits,*recv_waits; 829 MPI_Status recv_status,*send_status; 830 const PetscScalar *xx; 831 PetscScalar *bb,*mask; 832 Vec xmask,lmask; 833 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 834 const PetscInt *aj, *ii,*ridx; 835 PetscScalar *aa; 836 #if defined(PETSC_DEBUG) 837 PetscBool found = PETSC_FALSE; 838 #endif 839 840 PetscFunctionBegin; 841 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 842 /* first count number of contributors to each processor */ 843 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 844 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 845 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 846 j = 0; 847 for (i=0; i<N; i++) { 848 if (lastidx > (idx = rows[i])) j = 0; 849 lastidx = idx; 850 for (; j<size; j++) { 851 if (idx >= owners[j] && idx < owners[j+1]) { 852 nprocs[2*j]++; 853 nprocs[2*j+1] = 1; 854 owner[i] = j; 855 #if defined(PETSC_DEBUG) 856 found = PETSC_TRUE; 857 #endif 858 break; 859 } 860 } 861 #if defined(PETSC_DEBUG) 862 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 863 found = PETSC_FALSE; 864 #endif 865 } 866 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 867 868 /* inform other processors of number of messages and max length*/ 869 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 870 871 /* post receives: */ 872 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 873 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 874 for (i=0; i<nrecvs; i++) { 875 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 876 } 877 878 /* do sends: 879 1) starts[i] gives the starting index in svalues for stuff going to 880 the ith processor 881 */ 882 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 883 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 884 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 885 886 starts[0] = 0; 887 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 888 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 889 890 starts[0] = 0; 891 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 892 count = 0; 893 for (i=0; i<size; i++) { 894 if (nprocs[2*i+1]) { 895 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 896 } 897 } 898 ierr = PetscFree(starts);CHKERRQ(ierr); 899 900 base = owners[rank]; 901 902 /* wait on receives */ 903 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 904 count = nrecvs; slen = 0; 905 while (count) { 906 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 907 /* unpack receives into our local space */ 908 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 909 910 source[imdex] = recv_status.MPI_SOURCE; 911 lens[imdex] = n; 912 slen += n; 913 count--; 914 } 915 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 916 917 /* move the data into the send scatter */ 918 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 919 count = 0; 920 for (i=0; i<nrecvs; i++) { 921 values = rvalues + i*nmax; 922 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 923 } 924 ierr = PetscFree(rvalues);CHKERRQ(ierr); 925 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 926 ierr = PetscFree(owner);CHKERRQ(ierr); 927 ierr = PetscFree(nprocs);CHKERRQ(ierr); 928 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 929 930 /* zero diagonal part of matrix */ 931 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 932 933 /* handle off diagonal part of matrix */ 934 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 935 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 936 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 937 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 938 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 939 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 940 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 941 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 942 if (x) { 943 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 944 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 945 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 946 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 947 } 948 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 949 950 /* remove zeroed rows of off diagonal matrix */ 951 ii = aij->i; 952 for (i=0; i<slen; i++) { 953 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 954 } 955 956 /* loop over all elements of off process part of matrix zeroing removed columns*/ 957 if (aij->compressedrow.use) { 958 m = aij->compressedrow.nrows; 959 ii = aij->compressedrow.i; 960 ridx = aij->compressedrow.rindex; 961 for (i=0; i<m; i++) { 962 n = ii[i+1] - ii[i]; 963 aj = aij->j + ii[i]; 964 aa = aij->a + ii[i]; 965 966 for (j=0; j<n; j++) { 967 if (PetscAbsScalar(mask[*aj])) { 968 if (b) bb[*ridx] -= *aa*xx[*aj]; 969 *aa = 0.0; 970 } 971 aa++; 972 aj++; 973 } 974 ridx++; 975 } 976 } else { /* do not use compressed row format */ 977 m = l->B->rmap->n; 978 for (i=0; i<m; i++) { 979 n = ii[i+1] - ii[i]; 980 aj = aij->j + ii[i]; 981 aa = aij->a + ii[i]; 982 for (j=0; j<n; j++) { 983 if (PetscAbsScalar(mask[*aj])) { 984 if (b) bb[i] -= *aa*xx[*aj]; 985 *aa = 0.0; 986 } 987 aa++; 988 aj++; 989 } 990 } 991 } 992 if (x) { 993 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 994 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 995 } 996 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 997 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 998 ierr = PetscFree(lrows);CHKERRQ(ierr); 999 1000 /* wait on sends */ 1001 if (nsends) { 1002 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1003 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1004 ierr = PetscFree(send_status);CHKERRQ(ierr); 1005 } 1006 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1007 ierr = PetscFree(svalues);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 #undef __FUNCT__ 1012 #define __FUNCT__ "MatMult_MPIAIJ" 1013 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1014 { 1015 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1016 PetscErrorCode ierr; 1017 PetscInt nt; 1018 1019 PetscFunctionBegin; 1020 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1021 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1022 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1023 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1024 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1025 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1026 PetscFunctionReturn(0); 1027 } 1028 1029 #undef __FUNCT__ 1030 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1031 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1032 { 1033 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1034 PetscErrorCode ierr; 1035 1036 PetscFunctionBegin; 1037 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1038 PetscFunctionReturn(0); 1039 } 1040 1041 #undef __FUNCT__ 1042 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1043 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1044 { 1045 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1046 PetscErrorCode ierr; 1047 1048 PetscFunctionBegin; 1049 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1050 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1051 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1052 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1053 PetscFunctionReturn(0); 1054 } 1055 1056 #undef __FUNCT__ 1057 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1058 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1059 { 1060 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1061 PetscErrorCode ierr; 1062 PetscBool merged; 1063 1064 PetscFunctionBegin; 1065 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1066 /* do nondiagonal part */ 1067 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1068 if (!merged) { 1069 /* send it on its way */ 1070 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1071 /* do local part */ 1072 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1073 /* receive remote parts: note this assumes the values are not actually */ 1074 /* added in yy until the next line, */ 1075 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1076 } else { 1077 /* do local part */ 1078 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1079 /* send it on its way */ 1080 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1081 /* values actually were received in the Begin() but we need to call this nop */ 1082 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1089 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1090 { 1091 MPI_Comm comm; 1092 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1093 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1094 IS Me,Notme; 1095 PetscErrorCode ierr; 1096 PetscInt M,N,first,last,*notme,i; 1097 PetscMPIInt size; 1098 1099 PetscFunctionBegin; 1100 /* Easy test: symmetric diagonal block */ 1101 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1102 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1103 if (!*f) PetscFunctionReturn(0); 1104 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1105 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1106 if (size == 1) PetscFunctionReturn(0); 1107 1108 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1109 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1110 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1111 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1112 for (i=0; i<first; i++) notme[i] = i; 1113 for (i=last; i<M; i++) notme[i-last+first] = i; 1114 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1115 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1116 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1117 Aoff = Aoffs[0]; 1118 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1119 Boff = Boffs[0]; 1120 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1121 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1122 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1123 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1124 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1125 ierr = PetscFree(notme);CHKERRQ(ierr); 1126 PetscFunctionReturn(0); 1127 } 1128 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1131 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1132 { 1133 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1134 PetscErrorCode ierr; 1135 1136 PetscFunctionBegin; 1137 /* do nondiagonal part */ 1138 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1139 /* send it on its way */ 1140 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1141 /* do local part */ 1142 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1143 /* receive remote parts */ 1144 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 /* 1149 This only works correctly for square matrices where the subblock A->A is the 1150 diagonal block 1151 */ 1152 #undef __FUNCT__ 1153 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1154 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1155 { 1156 PetscErrorCode ierr; 1157 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1158 1159 PetscFunctionBegin; 1160 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1161 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1162 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1163 PetscFunctionReturn(0); 1164 } 1165 1166 #undef __FUNCT__ 1167 #define __FUNCT__ "MatScale_MPIAIJ" 1168 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1169 { 1170 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1171 PetscErrorCode ierr; 1172 1173 PetscFunctionBegin; 1174 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1175 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatDestroy_MPIAIJ" 1181 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1182 { 1183 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1184 PetscErrorCode ierr; 1185 1186 PetscFunctionBegin; 1187 #if defined(PETSC_USE_LOG) 1188 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1189 #endif 1190 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1191 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1192 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1193 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1194 #if defined(PETSC_USE_CTABLE) 1195 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1196 #else 1197 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1198 #endif 1199 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1200 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1201 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1202 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1203 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1204 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1205 1206 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1207 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1208 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1209 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1210 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1211 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1212 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1213 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1214 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1220 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1221 { 1222 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1223 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1224 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1225 PetscErrorCode ierr; 1226 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1227 int fd; 1228 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1229 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1230 PetscScalar *column_values; 1231 PetscInt message_count,flowcontrolcount; 1232 FILE *file; 1233 1234 PetscFunctionBegin; 1235 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1236 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1237 nz = A->nz + B->nz; 1238 if (!rank) { 1239 header[0] = MAT_FILE_CLASSID; 1240 header[1] = mat->rmap->N; 1241 header[2] = mat->cmap->N; 1242 1243 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1244 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1245 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1246 /* get largest number of rows any processor has */ 1247 rlen = mat->rmap->n; 1248 range = mat->rmap->range; 1249 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1250 } else { 1251 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1252 rlen = mat->rmap->n; 1253 } 1254 1255 /* load up the local row counts */ 1256 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1257 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1258 1259 /* store the row lengths to the file */ 1260 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1261 if (!rank) { 1262 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1263 for (i=1; i<size; i++) { 1264 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1265 rlen = range[i+1] - range[i]; 1266 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1267 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1268 } 1269 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1270 } else { 1271 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1272 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1273 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1274 } 1275 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1276 1277 /* load up the local column indices */ 1278 nzmax = nz; /* th processor needs space a largest processor needs */ 1279 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1280 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1281 cnt = 0; 1282 for (i=0; i<mat->rmap->n; i++) { 1283 for (j=B->i[i]; j<B->i[i+1]; j++) { 1284 if ((col = garray[B->j[j]]) > cstart) break; 1285 column_indices[cnt++] = col; 1286 } 1287 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1288 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1289 } 1290 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1291 1292 /* store the column indices to the file */ 1293 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1294 if (!rank) { 1295 MPI_Status status; 1296 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1297 for (i=1; i<size; i++) { 1298 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1299 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1300 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1301 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1302 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1303 } 1304 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1305 } else { 1306 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1307 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1308 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1309 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1310 } 1311 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1312 1313 /* load up the local column values */ 1314 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1315 cnt = 0; 1316 for (i=0; i<mat->rmap->n; i++) { 1317 for (j=B->i[i]; j<B->i[i+1]; j++) { 1318 if (garray[B->j[j]] > cstart) break; 1319 column_values[cnt++] = B->a[j]; 1320 } 1321 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1322 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1323 } 1324 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1325 1326 /* store the column values to the file */ 1327 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1328 if (!rank) { 1329 MPI_Status status; 1330 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1331 for (i=1; i<size; i++) { 1332 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1333 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1334 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1335 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1336 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1337 } 1338 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1339 } else { 1340 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1341 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1342 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1343 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1344 } 1345 ierr = PetscFree(column_values);CHKERRQ(ierr); 1346 1347 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1348 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #include <petscdraw.h> 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1355 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1356 { 1357 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1358 PetscErrorCode ierr; 1359 PetscMPIInt rank = aij->rank,size = aij->size; 1360 PetscBool isdraw,iascii,isbinary; 1361 PetscViewer sviewer; 1362 PetscViewerFormat format; 1363 1364 PetscFunctionBegin; 1365 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1366 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1367 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1368 if (iascii) { 1369 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1370 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1371 MatInfo info; 1372 PetscBool inodes; 1373 1374 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1375 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1376 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1377 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1378 if (!inodes) { 1379 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1380 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1381 } else { 1382 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1383 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1384 } 1385 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1386 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1387 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1388 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1389 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1390 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1391 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1392 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1393 PetscFunctionReturn(0); 1394 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1395 PetscInt inodecount,inodelimit,*inodes; 1396 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1397 if (inodes) { 1398 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1399 } else { 1400 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1401 } 1402 PetscFunctionReturn(0); 1403 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1404 PetscFunctionReturn(0); 1405 } 1406 } else if (isbinary) { 1407 if (size == 1) { 1408 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1409 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1410 } else { 1411 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1412 } 1413 PetscFunctionReturn(0); 1414 } else if (isdraw) { 1415 PetscDraw draw; 1416 PetscBool isnull; 1417 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1418 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1419 } 1420 1421 if (size == 1) { 1422 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1423 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1424 } else { 1425 /* assemble the entire matrix onto first processor. */ 1426 Mat A; 1427 Mat_SeqAIJ *Aloc; 1428 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1429 MatScalar *a; 1430 1431 if (mat->rmap->N > 1024) { 1432 PetscBool flg = PETSC_FALSE; 1433 1434 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1435 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1436 } 1437 1438 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1439 if (!rank) { 1440 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1441 } else { 1442 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1443 } 1444 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1445 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1446 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1447 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1448 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1449 1450 /* copy over the A part */ 1451 Aloc = (Mat_SeqAIJ*)aij->A->data; 1452 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1453 row = mat->rmap->rstart; 1454 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1455 for (i=0; i<m; i++) { 1456 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1457 row++; 1458 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1459 } 1460 aj = Aloc->j; 1461 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1462 1463 /* copy over the B part */ 1464 Aloc = (Mat_SeqAIJ*)aij->B->data; 1465 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1466 row = mat->rmap->rstart; 1467 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1468 ct = cols; 1469 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1470 for (i=0; i<m; i++) { 1471 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1472 row++; 1473 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1474 } 1475 ierr = PetscFree(ct);CHKERRQ(ierr); 1476 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1477 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1478 /* 1479 Everyone has to call to draw the matrix since the graphics waits are 1480 synchronized across all processors that share the PetscDraw object 1481 */ 1482 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1483 if (!rank) { 1484 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1485 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1486 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1487 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1488 } 1489 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1490 ierr = MatDestroy(&A);CHKERRQ(ierr); 1491 } 1492 PetscFunctionReturn(0); 1493 } 1494 1495 #undef __FUNCT__ 1496 #define __FUNCT__ "MatView_MPIAIJ" 1497 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1498 { 1499 PetscErrorCode ierr; 1500 PetscBool iascii,isdraw,issocket,isbinary; 1501 1502 PetscFunctionBegin; 1503 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1504 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1505 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1506 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1507 if (iascii || isdraw || isbinary || issocket) { 1508 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1509 } 1510 PetscFunctionReturn(0); 1511 } 1512 1513 #undef __FUNCT__ 1514 #define __FUNCT__ "MatSOR_MPIAIJ" 1515 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1516 { 1517 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1518 PetscErrorCode ierr; 1519 Vec bb1 = 0; 1520 PetscBool hasop; 1521 1522 PetscFunctionBegin; 1523 if (flag == SOR_APPLY_UPPER) { 1524 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1525 PetscFunctionReturn(0); 1526 } 1527 1528 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1529 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1530 } 1531 1532 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1533 if (flag & SOR_ZERO_INITIAL_GUESS) { 1534 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1535 its--; 1536 } 1537 1538 while (its--) { 1539 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1540 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1541 1542 /* update rhs: bb1 = bb - B*x */ 1543 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1544 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1545 1546 /* local sweep */ 1547 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1548 } 1549 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1550 if (flag & SOR_ZERO_INITIAL_GUESS) { 1551 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1552 its--; 1553 } 1554 while (its--) { 1555 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1556 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1557 1558 /* update rhs: bb1 = bb - B*x */ 1559 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1560 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1561 1562 /* local sweep */ 1563 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1564 } 1565 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1566 if (flag & SOR_ZERO_INITIAL_GUESS) { 1567 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1568 its--; 1569 } 1570 while (its--) { 1571 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1572 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1573 1574 /* update rhs: bb1 = bb - B*x */ 1575 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1576 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1577 1578 /* local sweep */ 1579 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1580 } 1581 } else if (flag & SOR_EISENSTAT) { 1582 Vec xx1; 1583 1584 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1585 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1586 1587 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1588 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1589 if (!mat->diag) { 1590 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1591 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1592 } 1593 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1594 if (hasop) { 1595 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1596 } else { 1597 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1598 } 1599 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1600 1601 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1602 1603 /* local sweep */ 1604 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1605 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1606 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1607 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1608 1609 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1610 PetscFunctionReturn(0); 1611 } 1612 1613 #undef __FUNCT__ 1614 #define __FUNCT__ "MatPermute_MPIAIJ" 1615 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1616 { 1617 Mat aA,aB,Aperm; 1618 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1619 PetscScalar *aa,*ba; 1620 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1621 PetscSF rowsf,sf; 1622 IS parcolp = NULL; 1623 PetscBool done; 1624 PetscErrorCode ierr; 1625 1626 PetscFunctionBegin; 1627 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1628 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1629 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1630 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1631 1632 /* Invert row permutation to find out where my rows should go */ 1633 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1634 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1635 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1636 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1637 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1638 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1639 1640 /* Invert column permutation to find out where my columns should go */ 1641 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1642 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1643 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1644 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1645 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1646 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1647 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1648 1649 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1650 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1651 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1652 1653 /* Find out where my gcols should go */ 1654 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1655 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1656 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1657 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1658 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1659 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1660 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1661 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1662 1663 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1664 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1665 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1666 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1667 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1668 for (i=0; i<m; i++) { 1669 PetscInt row = rdest[i],rowner; 1670 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1671 for (j=ai[i]; j<ai[i+1]; j++) { 1672 PetscInt cowner,col = cdest[aj[j]]; 1673 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1674 if (rowner == cowner) dnnz[i]++; 1675 else onnz[i]++; 1676 } 1677 for (j=bi[i]; j<bi[i+1]; j++) { 1678 PetscInt cowner,col = gcdest[bj[j]]; 1679 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1680 if (rowner == cowner) dnnz[i]++; 1681 else onnz[i]++; 1682 } 1683 } 1684 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1685 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1686 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1687 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1688 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1689 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1690 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1691 1692 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1693 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1694 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1695 for (i=0; i<m; i++) { 1696 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1697 PetscInt rowlen; 1698 rowlen = ai[i+1] - ai[i]; 1699 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1700 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1701 rowlen = bi[i+1] - bi[i]; 1702 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1703 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1704 } 1705 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1706 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1707 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1708 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1709 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1710 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1711 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1712 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1713 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1714 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1715 *B = Aperm; 1716 PetscFunctionReturn(0); 1717 } 1718 1719 #undef __FUNCT__ 1720 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1721 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1722 { 1723 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1724 Mat A = mat->A,B = mat->B; 1725 PetscErrorCode ierr; 1726 PetscReal isend[5],irecv[5]; 1727 1728 PetscFunctionBegin; 1729 info->block_size = 1.0; 1730 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1731 1732 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1733 isend[3] = info->memory; isend[4] = info->mallocs; 1734 1735 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1736 1737 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1738 isend[3] += info->memory; isend[4] += info->mallocs; 1739 if (flag == MAT_LOCAL) { 1740 info->nz_used = isend[0]; 1741 info->nz_allocated = isend[1]; 1742 info->nz_unneeded = isend[2]; 1743 info->memory = isend[3]; 1744 info->mallocs = isend[4]; 1745 } else if (flag == MAT_GLOBAL_MAX) { 1746 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1747 1748 info->nz_used = irecv[0]; 1749 info->nz_allocated = irecv[1]; 1750 info->nz_unneeded = irecv[2]; 1751 info->memory = irecv[3]; 1752 info->mallocs = irecv[4]; 1753 } else if (flag == MAT_GLOBAL_SUM) { 1754 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1755 1756 info->nz_used = irecv[0]; 1757 info->nz_allocated = irecv[1]; 1758 info->nz_unneeded = irecv[2]; 1759 info->memory = irecv[3]; 1760 info->mallocs = irecv[4]; 1761 } 1762 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1763 info->fill_ratio_needed = 0; 1764 info->factor_mallocs = 0; 1765 PetscFunctionReturn(0); 1766 } 1767 1768 #undef __FUNCT__ 1769 #define __FUNCT__ "MatSetOption_MPIAIJ" 1770 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1771 { 1772 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1773 PetscErrorCode ierr; 1774 1775 PetscFunctionBegin; 1776 switch (op) { 1777 case MAT_NEW_NONZERO_LOCATIONS: 1778 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1779 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1780 case MAT_KEEP_NONZERO_PATTERN: 1781 case MAT_NEW_NONZERO_LOCATION_ERR: 1782 case MAT_USE_INODES: 1783 case MAT_IGNORE_ZERO_ENTRIES: 1784 MatCheckPreallocated(A,1); 1785 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1786 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1787 break; 1788 case MAT_ROW_ORIENTED: 1789 a->roworiented = flg; 1790 1791 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1792 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1793 break; 1794 case MAT_NEW_DIAGONALS: 1795 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1796 break; 1797 case MAT_IGNORE_OFF_PROC_ENTRIES: 1798 a->donotstash = flg; 1799 break; 1800 case MAT_SPD: 1801 A->spd_set = PETSC_TRUE; 1802 A->spd = flg; 1803 if (flg) { 1804 A->symmetric = PETSC_TRUE; 1805 A->structurally_symmetric = PETSC_TRUE; 1806 A->symmetric_set = PETSC_TRUE; 1807 A->structurally_symmetric_set = PETSC_TRUE; 1808 } 1809 break; 1810 case MAT_SYMMETRIC: 1811 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1812 break; 1813 case MAT_STRUCTURALLY_SYMMETRIC: 1814 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1815 break; 1816 case MAT_HERMITIAN: 1817 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1818 break; 1819 case MAT_SYMMETRY_ETERNAL: 1820 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1821 break; 1822 default: 1823 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1824 } 1825 PetscFunctionReturn(0); 1826 } 1827 1828 #undef __FUNCT__ 1829 #define __FUNCT__ "MatGetRow_MPIAIJ" 1830 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1831 { 1832 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1833 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1834 PetscErrorCode ierr; 1835 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1836 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1837 PetscInt *cmap,*idx_p; 1838 1839 PetscFunctionBegin; 1840 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1841 mat->getrowactive = PETSC_TRUE; 1842 1843 if (!mat->rowvalues && (idx || v)) { 1844 /* 1845 allocate enough space to hold information from the longest row. 1846 */ 1847 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1848 PetscInt max = 1,tmp; 1849 for (i=0; i<matin->rmap->n; i++) { 1850 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1851 if (max < tmp) max = tmp; 1852 } 1853 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1854 } 1855 1856 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1857 lrow = row - rstart; 1858 1859 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1860 if (!v) {pvA = 0; pvB = 0;} 1861 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1862 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1863 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1864 nztot = nzA + nzB; 1865 1866 cmap = mat->garray; 1867 if (v || idx) { 1868 if (nztot) { 1869 /* Sort by increasing column numbers, assuming A and B already sorted */ 1870 PetscInt imark = -1; 1871 if (v) { 1872 *v = v_p = mat->rowvalues; 1873 for (i=0; i<nzB; i++) { 1874 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1875 else break; 1876 } 1877 imark = i; 1878 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1879 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1880 } 1881 if (idx) { 1882 *idx = idx_p = mat->rowindices; 1883 if (imark > -1) { 1884 for (i=0; i<imark; i++) { 1885 idx_p[i] = cmap[cworkB[i]]; 1886 } 1887 } else { 1888 for (i=0; i<nzB; i++) { 1889 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1890 else break; 1891 } 1892 imark = i; 1893 } 1894 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1895 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1896 } 1897 } else { 1898 if (idx) *idx = 0; 1899 if (v) *v = 0; 1900 } 1901 } 1902 *nz = nztot; 1903 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1904 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1905 PetscFunctionReturn(0); 1906 } 1907 1908 #undef __FUNCT__ 1909 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1910 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1911 { 1912 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1913 1914 PetscFunctionBegin; 1915 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1916 aij->getrowactive = PETSC_FALSE; 1917 PetscFunctionReturn(0); 1918 } 1919 1920 #undef __FUNCT__ 1921 #define __FUNCT__ "MatNorm_MPIAIJ" 1922 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1923 { 1924 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1925 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1926 PetscErrorCode ierr; 1927 PetscInt i,j,cstart = mat->cmap->rstart; 1928 PetscReal sum = 0.0; 1929 MatScalar *v; 1930 1931 PetscFunctionBegin; 1932 if (aij->size == 1) { 1933 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1934 } else { 1935 if (type == NORM_FROBENIUS) { 1936 v = amat->a; 1937 for (i=0; i<amat->nz; i++) { 1938 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1939 } 1940 v = bmat->a; 1941 for (i=0; i<bmat->nz; i++) { 1942 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1943 } 1944 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1945 *norm = PetscSqrtReal(*norm); 1946 } else if (type == NORM_1) { /* max column norm */ 1947 PetscReal *tmp,*tmp2; 1948 PetscInt *jj,*garray = aij->garray; 1949 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1950 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1951 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1952 *norm = 0.0; 1953 v = amat->a; jj = amat->j; 1954 for (j=0; j<amat->nz; j++) { 1955 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1956 } 1957 v = bmat->a; jj = bmat->j; 1958 for (j=0; j<bmat->nz; j++) { 1959 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1960 } 1961 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1962 for (j=0; j<mat->cmap->N; j++) { 1963 if (tmp2[j] > *norm) *norm = tmp2[j]; 1964 } 1965 ierr = PetscFree(tmp);CHKERRQ(ierr); 1966 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1967 } else if (type == NORM_INFINITY) { /* max row norm */ 1968 PetscReal ntemp = 0.0; 1969 for (j=0; j<aij->A->rmap->n; j++) { 1970 v = amat->a + amat->i[j]; 1971 sum = 0.0; 1972 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1973 sum += PetscAbsScalar(*v); v++; 1974 } 1975 v = bmat->a + bmat->i[j]; 1976 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1977 sum += PetscAbsScalar(*v); v++; 1978 } 1979 if (sum > ntemp) ntemp = sum; 1980 } 1981 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1982 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1983 } 1984 PetscFunctionReturn(0); 1985 } 1986 1987 #undef __FUNCT__ 1988 #define __FUNCT__ "MatTranspose_MPIAIJ" 1989 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1990 { 1991 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1992 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1993 PetscErrorCode ierr; 1994 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1995 PetscInt cstart = A->cmap->rstart,ncol; 1996 Mat B; 1997 MatScalar *array; 1998 1999 PetscFunctionBegin; 2000 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2001 2002 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2003 ai = Aloc->i; aj = Aloc->j; 2004 bi = Bloc->i; bj = Bloc->j; 2005 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2006 PetscInt *d_nnz,*g_nnz,*o_nnz; 2007 PetscSFNode *oloc; 2008 PETSC_UNUSED PetscSF sf; 2009 2010 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2011 /* compute d_nnz for preallocation */ 2012 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2013 for (i=0; i<ai[ma]; i++) { 2014 d_nnz[aj[i]]++; 2015 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2016 } 2017 /* compute local off-diagonal contributions */ 2018 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2019 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2020 /* map those to global */ 2021 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2022 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2023 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2024 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2025 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2026 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2027 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2028 2029 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2030 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2031 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2032 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2033 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2034 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2035 } else { 2036 B = *matout; 2037 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2038 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2039 } 2040 2041 /* copy over the A part */ 2042 array = Aloc->a; 2043 row = A->rmap->rstart; 2044 for (i=0; i<ma; i++) { 2045 ncol = ai[i+1]-ai[i]; 2046 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2047 row++; 2048 array += ncol; aj += ncol; 2049 } 2050 aj = Aloc->j; 2051 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2052 2053 /* copy over the B part */ 2054 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2055 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2056 array = Bloc->a; 2057 row = A->rmap->rstart; 2058 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2059 cols_tmp = cols; 2060 for (i=0; i<mb; i++) { 2061 ncol = bi[i+1]-bi[i]; 2062 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2063 row++; 2064 array += ncol; cols_tmp += ncol; 2065 } 2066 ierr = PetscFree(cols);CHKERRQ(ierr); 2067 2068 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2069 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2070 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2071 *matout = B; 2072 } else { 2073 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2074 } 2075 PetscFunctionReturn(0); 2076 } 2077 2078 #undef __FUNCT__ 2079 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2080 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2081 { 2082 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2083 Mat a = aij->A,b = aij->B; 2084 PetscErrorCode ierr; 2085 PetscInt s1,s2,s3; 2086 2087 PetscFunctionBegin; 2088 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2089 if (rr) { 2090 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2091 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2092 /* Overlap communication with computation. */ 2093 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2094 } 2095 if (ll) { 2096 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2097 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2098 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2099 } 2100 /* scale the diagonal block */ 2101 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2102 2103 if (rr) { 2104 /* Do a scatter end and then right scale the off-diagonal block */ 2105 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2106 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2107 } 2108 PetscFunctionReturn(0); 2109 } 2110 2111 #undef __FUNCT__ 2112 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2113 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2114 { 2115 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2116 PetscErrorCode ierr; 2117 2118 PetscFunctionBegin; 2119 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2120 PetscFunctionReturn(0); 2121 } 2122 2123 #undef __FUNCT__ 2124 #define __FUNCT__ "MatEqual_MPIAIJ" 2125 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2126 { 2127 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2128 Mat a,b,c,d; 2129 PetscBool flg; 2130 PetscErrorCode ierr; 2131 2132 PetscFunctionBegin; 2133 a = matA->A; b = matA->B; 2134 c = matB->A; d = matB->B; 2135 2136 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2137 if (flg) { 2138 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2139 } 2140 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2141 PetscFunctionReturn(0); 2142 } 2143 2144 #undef __FUNCT__ 2145 #define __FUNCT__ "MatCopy_MPIAIJ" 2146 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2147 { 2148 PetscErrorCode ierr; 2149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2150 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2151 2152 PetscFunctionBegin; 2153 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2154 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2155 /* because of the column compression in the off-processor part of the matrix a->B, 2156 the number of columns in a->B and b->B may be different, hence we cannot call 2157 the MatCopy() directly on the two parts. If need be, we can provide a more 2158 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2159 then copying the submatrices */ 2160 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2161 } else { 2162 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2163 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2164 } 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatSetUp_MPIAIJ" 2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2171 { 2172 PetscErrorCode ierr; 2173 2174 PetscFunctionBegin; 2175 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2176 PetscFunctionReturn(0); 2177 } 2178 2179 #undef __FUNCT__ 2180 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2181 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2182 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2183 { 2184 PetscInt i,m=Y->rmap->N; 2185 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2186 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2187 const PetscInt *xi = x->i,*yi = y->i; 2188 2189 PetscFunctionBegin; 2190 /* Set the number of nonzeros in the new matrix */ 2191 for (i=0; i<m; i++) { 2192 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2193 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2194 nnz[i] = 0; 2195 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2196 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2197 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2198 nnz[i]++; 2199 } 2200 for (; k<nzy; k++) nnz[i]++; 2201 } 2202 PetscFunctionReturn(0); 2203 } 2204 2205 #undef __FUNCT__ 2206 #define __FUNCT__ "MatAXPY_MPIAIJ" 2207 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2208 { 2209 PetscErrorCode ierr; 2210 PetscInt i; 2211 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2212 PetscBLASInt bnz,one=1; 2213 Mat_SeqAIJ *x,*y; 2214 2215 PetscFunctionBegin; 2216 if (str == SAME_NONZERO_PATTERN) { 2217 PetscScalar alpha = a; 2218 x = (Mat_SeqAIJ*)xx->A->data; 2219 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2220 y = (Mat_SeqAIJ*)yy->A->data; 2221 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2222 x = (Mat_SeqAIJ*)xx->B->data; 2223 y = (Mat_SeqAIJ*)yy->B->data; 2224 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2225 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2226 } else if (str == SUBSET_NONZERO_PATTERN) { 2227 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2228 2229 x = (Mat_SeqAIJ*)xx->B->data; 2230 y = (Mat_SeqAIJ*)yy->B->data; 2231 if (y->xtoy && y->XtoY != xx->B) { 2232 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2233 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2234 } 2235 if (!y->xtoy) { /* get xtoy */ 2236 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2237 y->XtoY = xx->B; 2238 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2239 } 2240 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2241 } else { 2242 Mat B; 2243 PetscInt *nnz_d,*nnz_o; 2244 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2245 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2246 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2247 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2248 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2249 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2250 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2251 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2252 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2253 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2254 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2255 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2256 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2257 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2258 } 2259 PetscFunctionReturn(0); 2260 } 2261 2262 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2263 2264 #undef __FUNCT__ 2265 #define __FUNCT__ "MatConjugate_MPIAIJ" 2266 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2267 { 2268 #if defined(PETSC_USE_COMPLEX) 2269 PetscErrorCode ierr; 2270 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2271 2272 PetscFunctionBegin; 2273 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2274 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2275 #else 2276 PetscFunctionBegin; 2277 #endif 2278 PetscFunctionReturn(0); 2279 } 2280 2281 #undef __FUNCT__ 2282 #define __FUNCT__ "MatRealPart_MPIAIJ" 2283 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2284 { 2285 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2286 PetscErrorCode ierr; 2287 2288 PetscFunctionBegin; 2289 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2290 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2296 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2297 { 2298 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2299 PetscErrorCode ierr; 2300 2301 PetscFunctionBegin; 2302 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2303 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2304 PetscFunctionReturn(0); 2305 } 2306 2307 #if defined(PETSC_HAVE_PBGL) 2308 2309 #include <boost/parallel/mpi/bsp_process_group.hpp> 2310 #include <boost/graph/distributed/ilu_default_graph.hpp> 2311 #include <boost/graph/distributed/ilu_0_block.hpp> 2312 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2313 #include <boost/graph/distributed/petsc/interface.hpp> 2314 #include <boost/multi_array.hpp> 2315 #include <boost/parallel/distributed_property_map->hpp> 2316 2317 #undef __FUNCT__ 2318 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2319 /* 2320 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2321 */ 2322 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2323 { 2324 namespace petsc = boost::distributed::petsc; 2325 2326 namespace graph_dist = boost::graph::distributed; 2327 using boost::graph::distributed::ilu_default::process_group_type; 2328 using boost::graph::ilu_permuted; 2329 2330 PetscBool row_identity, col_identity; 2331 PetscContainer c; 2332 PetscInt m, n, M, N; 2333 PetscErrorCode ierr; 2334 2335 PetscFunctionBegin; 2336 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2337 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2338 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2339 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2340 2341 process_group_type pg; 2342 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2343 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2344 lgraph_type& level_graph = *lgraph_p; 2345 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2346 2347 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2348 ilu_permuted(level_graph); 2349 2350 /* put together the new matrix */ 2351 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2352 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2353 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2354 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2355 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2356 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2357 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2358 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2359 2360 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2361 ierr = PetscContainerSetPointer(c, lgraph_p); 2362 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2363 ierr = PetscContainerDestroy(&c); 2364 PetscFunctionReturn(0); 2365 } 2366 2367 #undef __FUNCT__ 2368 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2369 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2370 { 2371 PetscFunctionBegin; 2372 PetscFunctionReturn(0); 2373 } 2374 2375 #undef __FUNCT__ 2376 #define __FUNCT__ "MatSolve_MPIAIJ" 2377 /* 2378 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2379 */ 2380 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2381 { 2382 namespace graph_dist = boost::graph::distributed; 2383 2384 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2385 lgraph_type *lgraph_p; 2386 PetscContainer c; 2387 PetscErrorCode ierr; 2388 2389 PetscFunctionBegin; 2390 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2391 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2392 ierr = VecCopy(b, x);CHKERRQ(ierr); 2393 2394 PetscScalar *array_x; 2395 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2396 PetscInt sx; 2397 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2398 2399 PetscScalar *array_b; 2400 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2401 PetscInt sb; 2402 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2403 2404 lgraph_type& level_graph = *lgraph_p; 2405 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2406 2407 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2408 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2409 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2410 2411 typedef boost::iterator_property_map<array_ref_type::iterator, 2412 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2413 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2414 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2415 2416 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2417 PetscFunctionReturn(0); 2418 } 2419 #endif 2420 2421 #undef __FUNCT__ 2422 #define __FUNCT__ "MatDestroy_MatRedundant" 2423 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2424 { 2425 PetscErrorCode ierr; 2426 Mat_Redundant *redund; 2427 PetscInt i; 2428 PetscMPIInt size; 2429 2430 PetscFunctionBegin; 2431 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2432 if (size == 1) { 2433 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2434 redund = a->redundant; 2435 } else { 2436 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2437 redund = a->redundant; 2438 } 2439 if (redund){ 2440 if (redund->matseq) { /* via MatGetSubMatrices() */ 2441 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2442 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2443 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2444 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2445 } else { 2446 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2447 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2448 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2449 for (i=0; i<redund->nrecvs; i++) { 2450 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2451 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2452 } 2453 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2454 } 2455 2456 if (redund->psubcomm) { 2457 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2458 } 2459 ierr = redund->Destroy(A);CHKERRQ(ierr); 2460 ierr = PetscFree(redund);CHKERRQ(ierr); 2461 } 2462 PetscFunctionReturn(0); 2463 } 2464 2465 #undef __FUNCT__ 2466 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2467 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2468 { 2469 PetscMPIInt rank,size; 2470 MPI_Comm comm; 2471 PetscErrorCode ierr; 2472 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2473 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2474 PetscInt *rowrange = mat->rmap->range; 2475 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2476 Mat A = aij->A,B=aij->B,C=*matredundant; 2477 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2478 PetscScalar *sbuf_a; 2479 PetscInt nzlocal=a->nz+b->nz; 2480 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2481 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2482 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2483 MatScalar *aworkA,*aworkB; 2484 PetscScalar *vals; 2485 PetscMPIInt tag1,tag2,tag3,imdex; 2486 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2487 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2488 MPI_Status recv_status,*send_status; 2489 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2490 PetscInt **rbuf_j=NULL; 2491 PetscScalar **rbuf_a=NULL; 2492 Mat_Redundant *redund =NULL; 2493 2494 PetscFunctionBegin; 2495 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2496 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2497 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2498 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2499 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2500 2501 if (reuse == MAT_REUSE_MATRIX) { 2502 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2503 if (subsize == 1) { 2504 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2505 redund = c->redundant; 2506 } else { 2507 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2508 redund = c->redundant; 2509 } 2510 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2511 2512 nsends = redund->nsends; 2513 nrecvs = redund->nrecvs; 2514 send_rank = redund->send_rank; 2515 recv_rank = redund->recv_rank; 2516 sbuf_nz = redund->sbuf_nz; 2517 rbuf_nz = redund->rbuf_nz; 2518 sbuf_j = redund->sbuf_j; 2519 sbuf_a = redund->sbuf_a; 2520 rbuf_j = redund->rbuf_j; 2521 rbuf_a = redund->rbuf_a; 2522 } 2523 2524 if (reuse == MAT_INITIAL_MATRIX) { 2525 PetscInt nleftover,np_subcomm; 2526 2527 /* get the destination processors' id send_rank, nsends and nrecvs */ 2528 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2529 2530 np_subcomm = size/nsubcomm; 2531 nleftover = size - nsubcomm*np_subcomm; 2532 2533 /* block of codes below is specific for INTERLACED */ 2534 /* ------------------------------------------------*/ 2535 nsends = 0; nrecvs = 0; 2536 for (i=0; i<size; i++) { 2537 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2538 send_rank[nsends++] = i; 2539 recv_rank[nrecvs++] = i; 2540 } 2541 } 2542 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2543 i = size-nleftover-1; 2544 j = 0; 2545 while (j < nsubcomm - nleftover) { 2546 send_rank[nsends++] = i; 2547 i--; j++; 2548 } 2549 } 2550 2551 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2552 for (i=0; i<nleftover; i++) { 2553 recv_rank[nrecvs++] = size-nleftover+i; 2554 } 2555 } 2556 /*----------------------------------------------*/ 2557 2558 /* allocate sbuf_j, sbuf_a */ 2559 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2560 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2561 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2562 /* 2563 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2564 ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr); 2565 */ 2566 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2567 2568 /* copy mat's local entries into the buffers */ 2569 if (reuse == MAT_INITIAL_MATRIX) { 2570 rownz_max = 0; 2571 rptr = sbuf_j; 2572 cols = sbuf_j + rend-rstart + 1; 2573 vals = sbuf_a; 2574 rptr[0] = 0; 2575 for (i=0; i<rend-rstart; i++) { 2576 row = i + rstart; 2577 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2578 ncols = nzA + nzB; 2579 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2580 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2581 /* load the column indices for this row into cols */ 2582 lwrite = 0; 2583 for (l=0; l<nzB; l++) { 2584 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2585 vals[lwrite] = aworkB[l]; 2586 cols[lwrite++] = ctmp; 2587 } 2588 } 2589 for (l=0; l<nzA; l++) { 2590 vals[lwrite] = aworkA[l]; 2591 cols[lwrite++] = cstart + cworkA[l]; 2592 } 2593 for (l=0; l<nzB; l++) { 2594 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2595 vals[lwrite] = aworkB[l]; 2596 cols[lwrite++] = ctmp; 2597 } 2598 } 2599 vals += ncols; 2600 cols += ncols; 2601 rptr[i+1] = rptr[i] + ncols; 2602 if (rownz_max < ncols) rownz_max = ncols; 2603 } 2604 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2605 } else { /* only copy matrix values into sbuf_a */ 2606 rptr = sbuf_j; 2607 vals = sbuf_a; 2608 rptr[0] = 0; 2609 for (i=0; i<rend-rstart; i++) { 2610 row = i + rstart; 2611 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2612 ncols = nzA + nzB; 2613 cworkB = b->j + b->i[i]; 2614 aworkA = a->a + a->i[i]; 2615 aworkB = b->a + b->i[i]; 2616 lwrite = 0; 2617 for (l=0; l<nzB; l++) { 2618 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2619 } 2620 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2621 for (l=0; l<nzB; l++) { 2622 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2623 } 2624 vals += ncols; 2625 rptr[i+1] = rptr[i] + ncols; 2626 } 2627 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2628 2629 /* send nzlocal to others, and recv other's nzlocal */ 2630 /*--------------------------------------------------*/ 2631 if (reuse == MAT_INITIAL_MATRIX) { 2632 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2633 2634 s_waits2 = s_waits3 + nsends; 2635 s_waits1 = s_waits2 + nsends; 2636 r_waits1 = s_waits1 + nsends; 2637 r_waits2 = r_waits1 + nrecvs; 2638 r_waits3 = r_waits2 + nrecvs; 2639 } else { 2640 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2641 2642 r_waits3 = s_waits3 + nsends; 2643 } 2644 2645 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2646 if (reuse == MAT_INITIAL_MATRIX) { 2647 /* get new tags to keep the communication clean */ 2648 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2649 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2650 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2651 2652 /* post receives of other's nzlocal */ 2653 for (i=0; i<nrecvs; i++) { 2654 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2655 } 2656 /* send nzlocal to others */ 2657 for (i=0; i<nsends; i++) { 2658 sbuf_nz[i] = nzlocal; 2659 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2660 } 2661 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2662 count = nrecvs; 2663 while (count) { 2664 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2665 2666 recv_rank[imdex] = recv_status.MPI_SOURCE; 2667 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2668 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2669 2670 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2671 2672 rbuf_nz[imdex] += i + 2; 2673 2674 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2675 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2676 count--; 2677 } 2678 /* wait on sends of nzlocal */ 2679 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2680 /* send mat->i,j to others, and recv from other's */ 2681 /*------------------------------------------------*/ 2682 for (i=0; i<nsends; i++) { 2683 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2684 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2685 } 2686 /* wait on receives of mat->i,j */ 2687 /*------------------------------*/ 2688 count = nrecvs; 2689 while (count) { 2690 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2691 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2692 count--; 2693 } 2694 /* wait on sends of mat->i,j */ 2695 /*---------------------------*/ 2696 if (nsends) { 2697 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2698 } 2699 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2700 2701 /* post receives, send and receive mat->a */ 2702 /*----------------------------------------*/ 2703 for (imdex=0; imdex<nrecvs; imdex++) { 2704 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2705 } 2706 for (i=0; i<nsends; i++) { 2707 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2708 } 2709 count = nrecvs; 2710 while (count) { 2711 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2712 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2713 count--; 2714 } 2715 if (nsends) { 2716 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2717 } 2718 2719 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2720 2721 /* create redundant matrix */ 2722 /*-------------------------*/ 2723 if (reuse == MAT_INITIAL_MATRIX) { 2724 const PetscInt *range; 2725 PetscInt rstart_sub,rend_sub,mloc_sub; 2726 2727 /* compute rownz_max for preallocation */ 2728 for (imdex=0; imdex<nrecvs; imdex++) { 2729 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2730 rptr = rbuf_j[imdex]; 2731 for (i=0; i<j; i++) { 2732 ncols = rptr[i+1] - rptr[i]; 2733 if (rownz_max < ncols) rownz_max = ncols; 2734 } 2735 } 2736 2737 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2738 2739 /* get local size of redundant matrix 2740 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2741 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2742 rstart_sub = range[nsubcomm*subrank]; 2743 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2744 rend_sub = range[nsubcomm*(subrank+1)]; 2745 } else { 2746 rend_sub = mat->rmap->N; 2747 } 2748 mloc_sub = rend_sub - rstart_sub; 2749 2750 if (M == N) { 2751 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2752 } else { /* non-square matrix */ 2753 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2754 } 2755 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2756 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2757 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2758 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2759 } else { 2760 C = *matredundant; 2761 } 2762 2763 /* insert local matrix entries */ 2764 rptr = sbuf_j; 2765 cols = sbuf_j + rend-rstart + 1; 2766 vals = sbuf_a; 2767 for (i=0; i<rend-rstart; i++) { 2768 row = i + rstart; 2769 ncols = rptr[i+1] - rptr[i]; 2770 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2771 vals += ncols; 2772 cols += ncols; 2773 } 2774 /* insert received matrix entries */ 2775 for (imdex=0; imdex<nrecvs; imdex++) { 2776 rstart = rowrange[recv_rank[imdex]]; 2777 rend = rowrange[recv_rank[imdex]+1]; 2778 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2779 rptr = rbuf_j[imdex]; 2780 cols = rbuf_j[imdex] + rend-rstart + 1; 2781 vals = rbuf_a[imdex]; 2782 for (i=0; i<rend-rstart; i++) { 2783 row = i + rstart; 2784 ncols = rptr[i+1] - rptr[i]; 2785 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2786 vals += ncols; 2787 cols += ncols; 2788 } 2789 } 2790 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2791 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2792 2793 if (reuse == MAT_INITIAL_MATRIX) { 2794 *matredundant = C; 2795 2796 /* create a supporting struct and attach it to C for reuse */ 2797 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2798 if (subsize == 1) { 2799 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2800 c->redundant = redund; 2801 } else { 2802 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2803 c->redundant = redund; 2804 } 2805 2806 redund->nzlocal = nzlocal; 2807 redund->nsends = nsends; 2808 redund->nrecvs = nrecvs; 2809 redund->send_rank = send_rank; 2810 redund->recv_rank = recv_rank; 2811 redund->sbuf_nz = sbuf_nz; 2812 redund->rbuf_nz = rbuf_nz; 2813 redund->sbuf_j = sbuf_j; 2814 redund->sbuf_a = sbuf_a; 2815 redund->rbuf_j = rbuf_j; 2816 redund->rbuf_a = rbuf_a; 2817 redund->psubcomm = NULL; 2818 2819 redund->Destroy = C->ops->destroy; 2820 C->ops->destroy = MatDestroy_MatRedundant; 2821 } 2822 PetscFunctionReturn(0); 2823 } 2824 2825 #undef __FUNCT__ 2826 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2827 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2828 { 2829 PetscErrorCode ierr; 2830 MPI_Comm comm; 2831 PetscMPIInt size,subsize; 2832 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2833 Mat_Redundant *redund=NULL; 2834 PetscSubcomm psubcomm=NULL; 2835 MPI_Comm subcomm_in=subcomm; 2836 Mat *matseq; 2837 IS isrow,iscol; 2838 2839 PetscFunctionBegin; 2840 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2841 if (reuse == MAT_INITIAL_MATRIX) { 2842 /* create psubcomm, then get subcomm */ 2843 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2844 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2845 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2846 2847 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2848 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2849 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2850 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2851 subcomm = psubcomm->comm; 2852 } else { /* retrieve psubcomm and subcomm */ 2853 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2854 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2855 if (subsize == 1) { 2856 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2857 redund = c->redundant; 2858 } else { 2859 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2860 redund = c->redundant; 2861 } 2862 psubcomm = redund->psubcomm; 2863 } 2864 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2865 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2866 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */ 2867 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2868 if (subsize == 1) { 2869 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2870 c->redundant->psubcomm = psubcomm; 2871 } else { 2872 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2873 c->redundant->psubcomm = psubcomm ; 2874 } 2875 } 2876 PetscFunctionReturn(0); 2877 } 2878 } 2879 2880 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2881 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2882 if (reuse == MAT_INITIAL_MATRIX) { 2883 /* create a local sequential matrix matseq[0] */ 2884 mloc_sub = PETSC_DECIDE; 2885 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2886 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2887 rstart = rend - mloc_sub; 2888 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2889 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2890 } else { /* reuse == MAT_REUSE_MATRIX */ 2891 if (subsize == 1) { 2892 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2893 redund = c->redundant; 2894 } else { 2895 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2896 redund = c->redundant; 2897 } 2898 2899 isrow = redund->isrow; 2900 iscol = redund->iscol; 2901 matseq = redund->matseq; 2902 } 2903 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2904 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2905 2906 if (reuse == MAT_INITIAL_MATRIX) { 2907 /* create a supporting struct and attach it to C for reuse */ 2908 ierr = PetscNewLog(*matredundant,Mat_Redundant,&redund);CHKERRQ(ierr); 2909 if (subsize == 1) { 2910 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2911 c->redundant = redund; 2912 } else { 2913 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2914 c->redundant = redund; 2915 } 2916 redund->isrow = isrow; 2917 redund->iscol = iscol; 2918 redund->matseq = matseq; 2919 redund->psubcomm = psubcomm; 2920 redund->Destroy = (*matredundant)->ops->destroy; 2921 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 2922 } 2923 PetscFunctionReturn(0); 2924 } 2925 2926 #undef __FUNCT__ 2927 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2928 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2929 { 2930 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2931 PetscErrorCode ierr; 2932 PetscInt i,*idxb = 0; 2933 PetscScalar *va,*vb; 2934 Vec vtmp; 2935 2936 PetscFunctionBegin; 2937 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2938 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2939 if (idx) { 2940 for (i=0; i<A->rmap->n; i++) { 2941 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2942 } 2943 } 2944 2945 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2946 if (idx) { 2947 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2948 } 2949 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2950 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2951 2952 for (i=0; i<A->rmap->n; i++) { 2953 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2954 va[i] = vb[i]; 2955 if (idx) idx[i] = a->garray[idxb[i]]; 2956 } 2957 } 2958 2959 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2960 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2961 ierr = PetscFree(idxb);CHKERRQ(ierr); 2962 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2963 PetscFunctionReturn(0); 2964 } 2965 2966 #undef __FUNCT__ 2967 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2968 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2969 { 2970 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2971 PetscErrorCode ierr; 2972 PetscInt i,*idxb = 0; 2973 PetscScalar *va,*vb; 2974 Vec vtmp; 2975 2976 PetscFunctionBegin; 2977 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2978 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2979 if (idx) { 2980 for (i=0; i<A->cmap->n; i++) { 2981 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2982 } 2983 } 2984 2985 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2986 if (idx) { 2987 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2988 } 2989 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2990 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2991 2992 for (i=0; i<A->rmap->n; i++) { 2993 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2994 va[i] = vb[i]; 2995 if (idx) idx[i] = a->garray[idxb[i]]; 2996 } 2997 } 2998 2999 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3000 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3001 ierr = PetscFree(idxb);CHKERRQ(ierr); 3002 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3003 PetscFunctionReturn(0); 3004 } 3005 3006 #undef __FUNCT__ 3007 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 3008 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3009 { 3010 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3011 PetscInt n = A->rmap->n; 3012 PetscInt cstart = A->cmap->rstart; 3013 PetscInt *cmap = mat->garray; 3014 PetscInt *diagIdx, *offdiagIdx; 3015 Vec diagV, offdiagV; 3016 PetscScalar *a, *diagA, *offdiagA; 3017 PetscInt r; 3018 PetscErrorCode ierr; 3019 3020 PetscFunctionBegin; 3021 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3022 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 3023 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 3024 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3025 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3026 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3027 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3028 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3029 for (r = 0; r < n; ++r) { 3030 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3031 a[r] = diagA[r]; 3032 idx[r] = cstart + diagIdx[r]; 3033 } else { 3034 a[r] = offdiagA[r]; 3035 idx[r] = cmap[offdiagIdx[r]]; 3036 } 3037 } 3038 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3039 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3040 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3041 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3042 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3043 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3044 PetscFunctionReturn(0); 3045 } 3046 3047 #undef __FUNCT__ 3048 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3049 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3050 { 3051 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3052 PetscInt n = A->rmap->n; 3053 PetscInt cstart = A->cmap->rstart; 3054 PetscInt *cmap = mat->garray; 3055 PetscInt *diagIdx, *offdiagIdx; 3056 Vec diagV, offdiagV; 3057 PetscScalar *a, *diagA, *offdiagA; 3058 PetscInt r; 3059 PetscErrorCode ierr; 3060 3061 PetscFunctionBegin; 3062 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3063 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3064 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3065 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3066 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3067 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3068 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3069 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3070 for (r = 0; r < n; ++r) { 3071 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3072 a[r] = diagA[r]; 3073 idx[r] = cstart + diagIdx[r]; 3074 } else { 3075 a[r] = offdiagA[r]; 3076 idx[r] = cmap[offdiagIdx[r]]; 3077 } 3078 } 3079 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3080 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3081 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3082 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3083 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3084 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3085 PetscFunctionReturn(0); 3086 } 3087 3088 #undef __FUNCT__ 3089 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3090 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3091 { 3092 PetscErrorCode ierr; 3093 Mat *dummy; 3094 3095 PetscFunctionBegin; 3096 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3097 *newmat = *dummy; 3098 ierr = PetscFree(dummy);CHKERRQ(ierr); 3099 PetscFunctionReturn(0); 3100 } 3101 3102 #undef __FUNCT__ 3103 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3104 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3105 { 3106 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3107 PetscErrorCode ierr; 3108 3109 PetscFunctionBegin; 3110 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3111 PetscFunctionReturn(0); 3112 } 3113 3114 #undef __FUNCT__ 3115 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3116 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3117 { 3118 PetscErrorCode ierr; 3119 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3120 3121 PetscFunctionBegin; 3122 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3123 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3124 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3125 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3126 PetscFunctionReturn(0); 3127 } 3128 3129 /* -------------------------------------------------------------------*/ 3130 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3131 MatGetRow_MPIAIJ, 3132 MatRestoreRow_MPIAIJ, 3133 MatMult_MPIAIJ, 3134 /* 4*/ MatMultAdd_MPIAIJ, 3135 MatMultTranspose_MPIAIJ, 3136 MatMultTransposeAdd_MPIAIJ, 3137 #if defined(PETSC_HAVE_PBGL) 3138 MatSolve_MPIAIJ, 3139 #else 3140 0, 3141 #endif 3142 0, 3143 0, 3144 /*10*/ 0, 3145 0, 3146 0, 3147 MatSOR_MPIAIJ, 3148 MatTranspose_MPIAIJ, 3149 /*15*/ MatGetInfo_MPIAIJ, 3150 MatEqual_MPIAIJ, 3151 MatGetDiagonal_MPIAIJ, 3152 MatDiagonalScale_MPIAIJ, 3153 MatNorm_MPIAIJ, 3154 /*20*/ MatAssemblyBegin_MPIAIJ, 3155 MatAssemblyEnd_MPIAIJ, 3156 MatSetOption_MPIAIJ, 3157 MatZeroEntries_MPIAIJ, 3158 /*24*/ MatZeroRows_MPIAIJ, 3159 0, 3160 #if defined(PETSC_HAVE_PBGL) 3161 0, 3162 #else 3163 0, 3164 #endif 3165 0, 3166 0, 3167 /*29*/ MatSetUp_MPIAIJ, 3168 #if defined(PETSC_HAVE_PBGL) 3169 0, 3170 #else 3171 0, 3172 #endif 3173 0, 3174 0, 3175 0, 3176 /*34*/ MatDuplicate_MPIAIJ, 3177 0, 3178 0, 3179 0, 3180 0, 3181 /*39*/ MatAXPY_MPIAIJ, 3182 MatGetSubMatrices_MPIAIJ, 3183 MatIncreaseOverlap_MPIAIJ, 3184 MatGetValues_MPIAIJ, 3185 MatCopy_MPIAIJ, 3186 /*44*/ MatGetRowMax_MPIAIJ, 3187 MatScale_MPIAIJ, 3188 0, 3189 0, 3190 MatZeroRowsColumns_MPIAIJ, 3191 /*49*/ MatSetRandom_MPIAIJ, 3192 0, 3193 0, 3194 0, 3195 0, 3196 /*54*/ MatFDColoringCreate_MPIXAIJ, 3197 0, 3198 MatSetUnfactored_MPIAIJ, 3199 MatPermute_MPIAIJ, 3200 0, 3201 /*59*/ MatGetSubMatrix_MPIAIJ, 3202 MatDestroy_MPIAIJ, 3203 MatView_MPIAIJ, 3204 0, 3205 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3206 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3207 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3208 0, 3209 0, 3210 0, 3211 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3212 MatGetRowMinAbs_MPIAIJ, 3213 0, 3214 MatSetColoring_MPIAIJ, 3215 0, 3216 MatSetValuesAdifor_MPIAIJ, 3217 /*75*/ MatFDColoringApply_AIJ, 3218 0, 3219 0, 3220 0, 3221 MatFindZeroDiagonals_MPIAIJ, 3222 /*80*/ 0, 3223 0, 3224 0, 3225 /*83*/ MatLoad_MPIAIJ, 3226 0, 3227 0, 3228 0, 3229 0, 3230 0, 3231 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3232 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3233 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3234 MatPtAP_MPIAIJ_MPIAIJ, 3235 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3236 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3237 0, 3238 0, 3239 0, 3240 0, 3241 /*99*/ 0, 3242 0, 3243 0, 3244 MatConjugate_MPIAIJ, 3245 0, 3246 /*104*/MatSetValuesRow_MPIAIJ, 3247 MatRealPart_MPIAIJ, 3248 MatImaginaryPart_MPIAIJ, 3249 0, 3250 0, 3251 /*109*/0, 3252 MatGetRedundantMatrix_MPIAIJ, 3253 MatGetRowMin_MPIAIJ, 3254 0, 3255 0, 3256 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3257 0, 3258 0, 3259 0, 3260 0, 3261 /*119*/0, 3262 0, 3263 0, 3264 0, 3265 MatGetMultiProcBlock_MPIAIJ, 3266 /*124*/MatFindNonzeroRows_MPIAIJ, 3267 MatGetColumnNorms_MPIAIJ, 3268 MatInvertBlockDiagonal_MPIAIJ, 3269 0, 3270 MatGetSubMatricesParallel_MPIAIJ, 3271 /*129*/0, 3272 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3273 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3274 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3275 0, 3276 /*134*/0, 3277 0, 3278 0, 3279 0, 3280 0, 3281 /*139*/0, 3282 0, 3283 0, 3284 MatFDColoringSetUp_MPIXAIJ 3285 }; 3286 3287 /* ----------------------------------------------------------------------------------------*/ 3288 3289 #undef __FUNCT__ 3290 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3291 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3292 { 3293 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3294 PetscErrorCode ierr; 3295 3296 PetscFunctionBegin; 3297 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3298 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3299 PetscFunctionReturn(0); 3300 } 3301 3302 #undef __FUNCT__ 3303 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3304 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3305 { 3306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3307 PetscErrorCode ierr; 3308 3309 PetscFunctionBegin; 3310 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3311 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3312 PetscFunctionReturn(0); 3313 } 3314 3315 #undef __FUNCT__ 3316 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3317 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3318 { 3319 Mat_MPIAIJ *b; 3320 PetscErrorCode ierr; 3321 3322 PetscFunctionBegin; 3323 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3324 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3325 b = (Mat_MPIAIJ*)B->data; 3326 3327 if (!B->preallocated) { 3328 /* Explicitly create 2 MATSEQAIJ matrices. */ 3329 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3330 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3331 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3332 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3333 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3334 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3335 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3336 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3337 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3338 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3339 } 3340 3341 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3342 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3343 B->preallocated = PETSC_TRUE; 3344 PetscFunctionReturn(0); 3345 } 3346 3347 #undef __FUNCT__ 3348 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3349 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3350 { 3351 Mat mat; 3352 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3353 PetscErrorCode ierr; 3354 3355 PetscFunctionBegin; 3356 *newmat = 0; 3357 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3358 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3359 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3360 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3361 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3362 a = (Mat_MPIAIJ*)mat->data; 3363 3364 mat->factortype = matin->factortype; 3365 mat->rmap->bs = matin->rmap->bs; 3366 mat->cmap->bs = matin->cmap->bs; 3367 mat->assembled = PETSC_TRUE; 3368 mat->insertmode = NOT_SET_VALUES; 3369 mat->preallocated = PETSC_TRUE; 3370 3371 a->size = oldmat->size; 3372 a->rank = oldmat->rank; 3373 a->donotstash = oldmat->donotstash; 3374 a->roworiented = oldmat->roworiented; 3375 a->rowindices = 0; 3376 a->rowvalues = 0; 3377 a->getrowactive = PETSC_FALSE; 3378 3379 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3380 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3381 3382 if (oldmat->colmap) { 3383 #if defined(PETSC_USE_CTABLE) 3384 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3385 #else 3386 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3387 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3388 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3389 #endif 3390 } else a->colmap = 0; 3391 if (oldmat->garray) { 3392 PetscInt len; 3393 len = oldmat->B->cmap->n; 3394 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3395 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3396 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3397 } else a->garray = 0; 3398 3399 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3400 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3401 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3402 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3403 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3404 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3405 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3406 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3407 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3408 *newmat = mat; 3409 PetscFunctionReturn(0); 3410 } 3411 3412 3413 3414 #undef __FUNCT__ 3415 #define __FUNCT__ "MatLoad_MPIAIJ" 3416 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3417 { 3418 PetscScalar *vals,*svals; 3419 MPI_Comm comm; 3420 PetscErrorCode ierr; 3421 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3422 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3423 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3424 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3425 PetscInt cend,cstart,n,*rowners,sizesset=1; 3426 int fd; 3427 PetscInt bs = 1; 3428 3429 PetscFunctionBegin; 3430 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3431 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3432 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3433 if (!rank) { 3434 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3435 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3436 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3437 } 3438 3439 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3440 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3441 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3442 3443 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3444 3445 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3446 M = header[1]; N = header[2]; 3447 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3448 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3449 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3450 3451 /* If global sizes are set, check if they are consistent with that given in the file */ 3452 if (sizesset) { 3453 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3454 } 3455 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3456 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3457 3458 /* determine ownership of all (block) rows */ 3459 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3460 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3461 else m = newMat->rmap->n; /* Set by user */ 3462 3463 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3464 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3465 3466 /* First process needs enough room for process with most rows */ 3467 if (!rank) { 3468 mmax = rowners[1]; 3469 for (i=2; i<=size; i++) { 3470 mmax = PetscMax(mmax, rowners[i]); 3471 } 3472 } else mmax = -1; /* unused, but compilers complain */ 3473 3474 rowners[0] = 0; 3475 for (i=2; i<=size; i++) { 3476 rowners[i] += rowners[i-1]; 3477 } 3478 rstart = rowners[rank]; 3479 rend = rowners[rank+1]; 3480 3481 /* distribute row lengths to all processors */ 3482 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 3483 if (!rank) { 3484 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3485 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3486 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3487 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3488 for (j=0; j<m; j++) { 3489 procsnz[0] += ourlens[j]; 3490 } 3491 for (i=1; i<size; i++) { 3492 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3493 /* calculate the number of nonzeros on each processor */ 3494 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3495 procsnz[i] += rowlengths[j]; 3496 } 3497 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3498 } 3499 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3500 } else { 3501 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3502 } 3503 3504 if (!rank) { 3505 /* determine max buffer needed and allocate it */ 3506 maxnz = 0; 3507 for (i=0; i<size; i++) { 3508 maxnz = PetscMax(maxnz,procsnz[i]); 3509 } 3510 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3511 3512 /* read in my part of the matrix column indices */ 3513 nz = procsnz[0]; 3514 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3515 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3516 3517 /* read in every one elses and ship off */ 3518 for (i=1; i<size; i++) { 3519 nz = procsnz[i]; 3520 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3521 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3522 } 3523 ierr = PetscFree(cols);CHKERRQ(ierr); 3524 } else { 3525 /* determine buffer space needed for message */ 3526 nz = 0; 3527 for (i=0; i<m; i++) { 3528 nz += ourlens[i]; 3529 } 3530 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3531 3532 /* receive message of column indices*/ 3533 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3534 } 3535 3536 /* determine column ownership if matrix is not square */ 3537 if (N != M) { 3538 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3539 else n = newMat->cmap->n; 3540 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3541 cstart = cend - n; 3542 } else { 3543 cstart = rstart; 3544 cend = rend; 3545 n = cend - cstart; 3546 } 3547 3548 /* loop over local rows, determining number of off diagonal entries */ 3549 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3550 jj = 0; 3551 for (i=0; i<m; i++) { 3552 for (j=0; j<ourlens[i]; j++) { 3553 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3554 jj++; 3555 } 3556 } 3557 3558 for (i=0; i<m; i++) { 3559 ourlens[i] -= offlens[i]; 3560 } 3561 if (!sizesset) { 3562 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3563 } 3564 3565 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3566 3567 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3568 3569 for (i=0; i<m; i++) { 3570 ourlens[i] += offlens[i]; 3571 } 3572 3573 if (!rank) { 3574 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3575 3576 /* read in my part of the matrix numerical values */ 3577 nz = procsnz[0]; 3578 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3579 3580 /* insert into matrix */ 3581 jj = rstart; 3582 smycols = mycols; 3583 svals = vals; 3584 for (i=0; i<m; i++) { 3585 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3586 smycols += ourlens[i]; 3587 svals += ourlens[i]; 3588 jj++; 3589 } 3590 3591 /* read in other processors and ship out */ 3592 for (i=1; i<size; i++) { 3593 nz = procsnz[i]; 3594 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3595 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3596 } 3597 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3598 } else { 3599 /* receive numeric values */ 3600 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3601 3602 /* receive message of values*/ 3603 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3604 3605 /* insert into matrix */ 3606 jj = rstart; 3607 smycols = mycols; 3608 svals = vals; 3609 for (i=0; i<m; i++) { 3610 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3611 smycols += ourlens[i]; 3612 svals += ourlens[i]; 3613 jj++; 3614 } 3615 } 3616 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3617 ierr = PetscFree(vals);CHKERRQ(ierr); 3618 ierr = PetscFree(mycols);CHKERRQ(ierr); 3619 ierr = PetscFree(rowners);CHKERRQ(ierr); 3620 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3621 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3622 PetscFunctionReturn(0); 3623 } 3624 3625 #undef __FUNCT__ 3626 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3627 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3628 { 3629 PetscErrorCode ierr; 3630 IS iscol_local; 3631 PetscInt csize; 3632 3633 PetscFunctionBegin; 3634 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3635 if (call == MAT_REUSE_MATRIX) { 3636 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3637 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3638 } else { 3639 PetscInt cbs; 3640 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3641 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3642 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3643 } 3644 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3645 if (call == MAT_INITIAL_MATRIX) { 3646 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3647 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3648 } 3649 PetscFunctionReturn(0); 3650 } 3651 3652 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3653 #undef __FUNCT__ 3654 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3655 /* 3656 Not great since it makes two copies of the submatrix, first an SeqAIJ 3657 in local and then by concatenating the local matrices the end result. 3658 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3659 3660 Note: This requires a sequential iscol with all indices. 3661 */ 3662 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3663 { 3664 PetscErrorCode ierr; 3665 PetscMPIInt rank,size; 3666 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3667 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3668 PetscBool allcolumns, colflag; 3669 Mat M,Mreuse; 3670 MatScalar *vwork,*aa; 3671 MPI_Comm comm; 3672 Mat_SeqAIJ *aij; 3673 3674 PetscFunctionBegin; 3675 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3676 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3677 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3678 3679 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3680 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3681 if (colflag && ncol == mat->cmap->N) { 3682 allcolumns = PETSC_TRUE; 3683 } else { 3684 allcolumns = PETSC_FALSE; 3685 } 3686 if (call == MAT_REUSE_MATRIX) { 3687 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3688 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3689 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3690 } else { 3691 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3692 } 3693 3694 /* 3695 m - number of local rows 3696 n - number of columns (same on all processors) 3697 rstart - first row in new global matrix generated 3698 */ 3699 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3700 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3701 if (call == MAT_INITIAL_MATRIX) { 3702 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3703 ii = aij->i; 3704 jj = aij->j; 3705 3706 /* 3707 Determine the number of non-zeros in the diagonal and off-diagonal 3708 portions of the matrix in order to do correct preallocation 3709 */ 3710 3711 /* first get start and end of "diagonal" columns */ 3712 if (csize == PETSC_DECIDE) { 3713 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3714 if (mglobal == n) { /* square matrix */ 3715 nlocal = m; 3716 } else { 3717 nlocal = n/size + ((n % size) > rank); 3718 } 3719 } else { 3720 nlocal = csize; 3721 } 3722 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3723 rstart = rend - nlocal; 3724 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3725 3726 /* next, compute all the lengths */ 3727 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3728 olens = dlens + m; 3729 for (i=0; i<m; i++) { 3730 jend = ii[i+1] - ii[i]; 3731 olen = 0; 3732 dlen = 0; 3733 for (j=0; j<jend; j++) { 3734 if (*jj < rstart || *jj >= rend) olen++; 3735 else dlen++; 3736 jj++; 3737 } 3738 olens[i] = olen; 3739 dlens[i] = dlen; 3740 } 3741 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3742 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3743 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3744 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3745 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3746 ierr = PetscFree(dlens);CHKERRQ(ierr); 3747 } else { 3748 PetscInt ml,nl; 3749 3750 M = *newmat; 3751 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3752 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3753 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3754 /* 3755 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3756 rather than the slower MatSetValues(). 3757 */ 3758 M->was_assembled = PETSC_TRUE; 3759 M->assembled = PETSC_FALSE; 3760 } 3761 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3762 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3763 ii = aij->i; 3764 jj = aij->j; 3765 aa = aij->a; 3766 for (i=0; i<m; i++) { 3767 row = rstart + i; 3768 nz = ii[i+1] - ii[i]; 3769 cwork = jj; jj += nz; 3770 vwork = aa; aa += nz; 3771 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3772 } 3773 3774 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3775 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3776 *newmat = M; 3777 3778 /* save submatrix used in processor for next request */ 3779 if (call == MAT_INITIAL_MATRIX) { 3780 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3781 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3782 } 3783 PetscFunctionReturn(0); 3784 } 3785 3786 #undef __FUNCT__ 3787 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3788 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3789 { 3790 PetscInt m,cstart, cend,j,nnz,i,d; 3791 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3792 const PetscInt *JJ; 3793 PetscScalar *values; 3794 PetscErrorCode ierr; 3795 3796 PetscFunctionBegin; 3797 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3798 3799 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3800 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3801 m = B->rmap->n; 3802 cstart = B->cmap->rstart; 3803 cend = B->cmap->rend; 3804 rstart = B->rmap->rstart; 3805 3806 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3807 3808 #if defined(PETSC_USE_DEBUGGING) 3809 for (i=0; i<m; i++) { 3810 nnz = Ii[i+1]- Ii[i]; 3811 JJ = J + Ii[i]; 3812 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3813 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3814 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3815 } 3816 #endif 3817 3818 for (i=0; i<m; i++) { 3819 nnz = Ii[i+1]- Ii[i]; 3820 JJ = J + Ii[i]; 3821 nnz_max = PetscMax(nnz_max,nnz); 3822 d = 0; 3823 for (j=0; j<nnz; j++) { 3824 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3825 } 3826 d_nnz[i] = d; 3827 o_nnz[i] = nnz - d; 3828 } 3829 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3830 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3831 3832 if (v) values = (PetscScalar*)v; 3833 else { 3834 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3835 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3836 } 3837 3838 for (i=0; i<m; i++) { 3839 ii = i + rstart; 3840 nnz = Ii[i+1]- Ii[i]; 3841 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3842 } 3843 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3844 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3845 3846 if (!v) { 3847 ierr = PetscFree(values);CHKERRQ(ierr); 3848 } 3849 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3850 PetscFunctionReturn(0); 3851 } 3852 3853 #undef __FUNCT__ 3854 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3855 /*@ 3856 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3857 (the default parallel PETSc format). 3858 3859 Collective on MPI_Comm 3860 3861 Input Parameters: 3862 + B - the matrix 3863 . i - the indices into j for the start of each local row (starts with zero) 3864 . j - the column indices for each local row (starts with zero) 3865 - v - optional values in the matrix 3866 3867 Level: developer 3868 3869 Notes: 3870 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3871 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3872 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3873 3874 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3875 3876 The format which is used for the sparse matrix input, is equivalent to a 3877 row-major ordering.. i.e for the following matrix, the input data expected is 3878 as shown: 3879 3880 1 0 0 3881 2 0 3 P0 3882 ------- 3883 4 5 6 P1 3884 3885 Process0 [P0]: rows_owned=[0,1] 3886 i = {0,1,3} [size = nrow+1 = 2+1] 3887 j = {0,0,2} [size = nz = 6] 3888 v = {1,2,3} [size = nz = 6] 3889 3890 Process1 [P1]: rows_owned=[2] 3891 i = {0,3} [size = nrow+1 = 1+1] 3892 j = {0,1,2} [size = nz = 6] 3893 v = {4,5,6} [size = nz = 6] 3894 3895 .keywords: matrix, aij, compressed row, sparse, parallel 3896 3897 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3898 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3899 @*/ 3900 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3901 { 3902 PetscErrorCode ierr; 3903 3904 PetscFunctionBegin; 3905 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3906 PetscFunctionReturn(0); 3907 } 3908 3909 #undef __FUNCT__ 3910 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3911 /*@C 3912 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3913 (the default parallel PETSc format). For good matrix assembly performance 3914 the user should preallocate the matrix storage by setting the parameters 3915 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3916 performance can be increased by more than a factor of 50. 3917 3918 Collective on MPI_Comm 3919 3920 Input Parameters: 3921 + A - the matrix 3922 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3923 (same value is used for all local rows) 3924 . d_nnz - array containing the number of nonzeros in the various rows of the 3925 DIAGONAL portion of the local submatrix (possibly different for each row) 3926 or NULL, if d_nz is used to specify the nonzero structure. 3927 The size of this array is equal to the number of local rows, i.e 'm'. 3928 For matrices that will be factored, you must leave room for (and set) 3929 the diagonal entry even if it is zero. 3930 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3931 submatrix (same value is used for all local rows). 3932 - o_nnz - array containing the number of nonzeros in the various rows of the 3933 OFF-DIAGONAL portion of the local submatrix (possibly different for 3934 each row) or NULL, if o_nz is used to specify the nonzero 3935 structure. The size of this array is equal to the number 3936 of local rows, i.e 'm'. 3937 3938 If the *_nnz parameter is given then the *_nz parameter is ignored 3939 3940 The AIJ format (also called the Yale sparse matrix format or 3941 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3942 storage. The stored row and column indices begin with zero. 3943 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3944 3945 The parallel matrix is partitioned such that the first m0 rows belong to 3946 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3947 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3948 3949 The DIAGONAL portion of the local submatrix of a processor can be defined 3950 as the submatrix which is obtained by extraction the part corresponding to 3951 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3952 first row that belongs to the processor, r2 is the last row belonging to 3953 the this processor, and c1-c2 is range of indices of the local part of a 3954 vector suitable for applying the matrix to. This is an mxn matrix. In the 3955 common case of a square matrix, the row and column ranges are the same and 3956 the DIAGONAL part is also square. The remaining portion of the local 3957 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3958 3959 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3960 3961 You can call MatGetInfo() to get information on how effective the preallocation was; 3962 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3963 You can also run with the option -info and look for messages with the string 3964 malloc in them to see if additional memory allocation was needed. 3965 3966 Example usage: 3967 3968 Consider the following 8x8 matrix with 34 non-zero values, that is 3969 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3970 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3971 as follows: 3972 3973 .vb 3974 1 2 0 | 0 3 0 | 0 4 3975 Proc0 0 5 6 | 7 0 0 | 8 0 3976 9 0 10 | 11 0 0 | 12 0 3977 ------------------------------------- 3978 13 0 14 | 15 16 17 | 0 0 3979 Proc1 0 18 0 | 19 20 21 | 0 0 3980 0 0 0 | 22 23 0 | 24 0 3981 ------------------------------------- 3982 Proc2 25 26 27 | 0 0 28 | 29 0 3983 30 0 0 | 31 32 33 | 0 34 3984 .ve 3985 3986 This can be represented as a collection of submatrices as: 3987 3988 .vb 3989 A B C 3990 D E F 3991 G H I 3992 .ve 3993 3994 Where the submatrices A,B,C are owned by proc0, D,E,F are 3995 owned by proc1, G,H,I are owned by proc2. 3996 3997 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3998 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3999 The 'M','N' parameters are 8,8, and have the same values on all procs. 4000 4001 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4002 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4003 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4004 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4005 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4006 matrix, ans [DF] as another SeqAIJ matrix. 4007 4008 When d_nz, o_nz parameters are specified, d_nz storage elements are 4009 allocated for every row of the local diagonal submatrix, and o_nz 4010 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4011 One way to choose d_nz and o_nz is to use the max nonzerors per local 4012 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4013 In this case, the values of d_nz,o_nz are: 4014 .vb 4015 proc0 : dnz = 2, o_nz = 2 4016 proc1 : dnz = 3, o_nz = 2 4017 proc2 : dnz = 1, o_nz = 4 4018 .ve 4019 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4020 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4021 for proc3. i.e we are using 12+15+10=37 storage locations to store 4022 34 values. 4023 4024 When d_nnz, o_nnz parameters are specified, the storage is specified 4025 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4026 In the above case the values for d_nnz,o_nnz are: 4027 .vb 4028 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4029 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4030 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4031 .ve 4032 Here the space allocated is sum of all the above values i.e 34, and 4033 hence pre-allocation is perfect. 4034 4035 Level: intermediate 4036 4037 .keywords: matrix, aij, compressed row, sparse, parallel 4038 4039 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4040 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4041 @*/ 4042 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4043 { 4044 PetscErrorCode ierr; 4045 4046 PetscFunctionBegin; 4047 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4048 PetscValidType(B,1); 4049 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4050 PetscFunctionReturn(0); 4051 } 4052 4053 #undef __FUNCT__ 4054 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4055 /*@ 4056 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4057 CSR format the local rows. 4058 4059 Collective on MPI_Comm 4060 4061 Input Parameters: 4062 + comm - MPI communicator 4063 . m - number of local rows (Cannot be PETSC_DECIDE) 4064 . n - This value should be the same as the local size used in creating the 4065 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4066 calculated if N is given) For square matrices n is almost always m. 4067 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4068 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4069 . i - row indices 4070 . j - column indices 4071 - a - matrix values 4072 4073 Output Parameter: 4074 . mat - the matrix 4075 4076 Level: intermediate 4077 4078 Notes: 4079 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4080 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4081 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4082 4083 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4084 4085 The format which is used for the sparse matrix input, is equivalent to a 4086 row-major ordering.. i.e for the following matrix, the input data expected is 4087 as shown: 4088 4089 1 0 0 4090 2 0 3 P0 4091 ------- 4092 4 5 6 P1 4093 4094 Process0 [P0]: rows_owned=[0,1] 4095 i = {0,1,3} [size = nrow+1 = 2+1] 4096 j = {0,0,2} [size = nz = 6] 4097 v = {1,2,3} [size = nz = 6] 4098 4099 Process1 [P1]: rows_owned=[2] 4100 i = {0,3} [size = nrow+1 = 1+1] 4101 j = {0,1,2} [size = nz = 6] 4102 v = {4,5,6} [size = nz = 6] 4103 4104 .keywords: matrix, aij, compressed row, sparse, parallel 4105 4106 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4107 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4108 @*/ 4109 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4110 { 4111 PetscErrorCode ierr; 4112 4113 PetscFunctionBegin; 4114 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4115 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4116 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4117 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4118 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4119 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4120 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4121 PetscFunctionReturn(0); 4122 } 4123 4124 #undef __FUNCT__ 4125 #define __FUNCT__ "MatCreateAIJ" 4126 /*@C 4127 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4128 (the default parallel PETSc format). For good matrix assembly performance 4129 the user should preallocate the matrix storage by setting the parameters 4130 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4131 performance can be increased by more than a factor of 50. 4132 4133 Collective on MPI_Comm 4134 4135 Input Parameters: 4136 + comm - MPI communicator 4137 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4138 This value should be the same as the local size used in creating the 4139 y vector for the matrix-vector product y = Ax. 4140 . n - This value should be the same as the local size used in creating the 4141 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4142 calculated if N is given) For square matrices n is almost always m. 4143 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4144 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4145 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4146 (same value is used for all local rows) 4147 . d_nnz - array containing the number of nonzeros in the various rows of the 4148 DIAGONAL portion of the local submatrix (possibly different for each row) 4149 or NULL, if d_nz is used to specify the nonzero structure. 4150 The size of this array is equal to the number of local rows, i.e 'm'. 4151 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4152 submatrix (same value is used for all local rows). 4153 - o_nnz - array containing the number of nonzeros in the various rows of the 4154 OFF-DIAGONAL portion of the local submatrix (possibly different for 4155 each row) or NULL, if o_nz is used to specify the nonzero 4156 structure. The size of this array is equal to the number 4157 of local rows, i.e 'm'. 4158 4159 Output Parameter: 4160 . A - the matrix 4161 4162 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4163 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4164 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4165 4166 Notes: 4167 If the *_nnz parameter is given then the *_nz parameter is ignored 4168 4169 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4170 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4171 storage requirements for this matrix. 4172 4173 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4174 processor than it must be used on all processors that share the object for 4175 that argument. 4176 4177 The user MUST specify either the local or global matrix dimensions 4178 (possibly both). 4179 4180 The parallel matrix is partitioned across processors such that the 4181 first m0 rows belong to process 0, the next m1 rows belong to 4182 process 1, the next m2 rows belong to process 2 etc.. where 4183 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4184 values corresponding to [m x N] submatrix. 4185 4186 The columns are logically partitioned with the n0 columns belonging 4187 to 0th partition, the next n1 columns belonging to the next 4188 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4189 4190 The DIAGONAL portion of the local submatrix on any given processor 4191 is the submatrix corresponding to the rows and columns m,n 4192 corresponding to the given processor. i.e diagonal matrix on 4193 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4194 etc. The remaining portion of the local submatrix [m x (N-n)] 4195 constitute the OFF-DIAGONAL portion. The example below better 4196 illustrates this concept. 4197 4198 For a square global matrix we define each processor's diagonal portion 4199 to be its local rows and the corresponding columns (a square submatrix); 4200 each processor's off-diagonal portion encompasses the remainder of the 4201 local matrix (a rectangular submatrix). 4202 4203 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4204 4205 When calling this routine with a single process communicator, a matrix of 4206 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4207 type of communicator, use the construction mechanism: 4208 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4209 4210 By default, this format uses inodes (identical nodes) when possible. 4211 We search for consecutive rows with the same nonzero structure, thereby 4212 reusing matrix information to achieve increased efficiency. 4213 4214 Options Database Keys: 4215 + -mat_no_inode - Do not use inodes 4216 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4217 - -mat_aij_oneindex - Internally use indexing starting at 1 4218 rather than 0. Note that when calling MatSetValues(), 4219 the user still MUST index entries starting at 0! 4220 4221 4222 Example usage: 4223 4224 Consider the following 8x8 matrix with 34 non-zero values, that is 4225 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4226 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4227 as follows: 4228 4229 .vb 4230 1 2 0 | 0 3 0 | 0 4 4231 Proc0 0 5 6 | 7 0 0 | 8 0 4232 9 0 10 | 11 0 0 | 12 0 4233 ------------------------------------- 4234 13 0 14 | 15 16 17 | 0 0 4235 Proc1 0 18 0 | 19 20 21 | 0 0 4236 0 0 0 | 22 23 0 | 24 0 4237 ------------------------------------- 4238 Proc2 25 26 27 | 0 0 28 | 29 0 4239 30 0 0 | 31 32 33 | 0 34 4240 .ve 4241 4242 This can be represented as a collection of submatrices as: 4243 4244 .vb 4245 A B C 4246 D E F 4247 G H I 4248 .ve 4249 4250 Where the submatrices A,B,C are owned by proc0, D,E,F are 4251 owned by proc1, G,H,I are owned by proc2. 4252 4253 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4254 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4255 The 'M','N' parameters are 8,8, and have the same values on all procs. 4256 4257 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4258 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4259 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4260 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4261 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4262 matrix, ans [DF] as another SeqAIJ matrix. 4263 4264 When d_nz, o_nz parameters are specified, d_nz storage elements are 4265 allocated for every row of the local diagonal submatrix, and o_nz 4266 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4267 One way to choose d_nz and o_nz is to use the max nonzerors per local 4268 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4269 In this case, the values of d_nz,o_nz are: 4270 .vb 4271 proc0 : dnz = 2, o_nz = 2 4272 proc1 : dnz = 3, o_nz = 2 4273 proc2 : dnz = 1, o_nz = 4 4274 .ve 4275 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4276 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4277 for proc3. i.e we are using 12+15+10=37 storage locations to store 4278 34 values. 4279 4280 When d_nnz, o_nnz parameters are specified, the storage is specified 4281 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4282 In the above case the values for d_nnz,o_nnz are: 4283 .vb 4284 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4285 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4286 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4287 .ve 4288 Here the space allocated is sum of all the above values i.e 34, and 4289 hence pre-allocation is perfect. 4290 4291 Level: intermediate 4292 4293 .keywords: matrix, aij, compressed row, sparse, parallel 4294 4295 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4296 MPIAIJ, MatCreateMPIAIJWithArrays() 4297 @*/ 4298 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4299 { 4300 PetscErrorCode ierr; 4301 PetscMPIInt size; 4302 4303 PetscFunctionBegin; 4304 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4305 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4306 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4307 if (size > 1) { 4308 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4309 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4310 } else { 4311 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4312 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4313 } 4314 PetscFunctionReturn(0); 4315 } 4316 4317 #undef __FUNCT__ 4318 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4319 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4320 { 4321 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4322 4323 PetscFunctionBegin; 4324 *Ad = a->A; 4325 *Ao = a->B; 4326 *colmap = a->garray; 4327 PetscFunctionReturn(0); 4328 } 4329 4330 #undef __FUNCT__ 4331 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4332 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4333 { 4334 PetscErrorCode ierr; 4335 PetscInt i; 4336 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4337 4338 PetscFunctionBegin; 4339 if (coloring->ctype == IS_COLORING_GLOBAL) { 4340 ISColoringValue *allcolors,*colors; 4341 ISColoring ocoloring; 4342 4343 /* set coloring for diagonal portion */ 4344 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4345 4346 /* set coloring for off-diagonal portion */ 4347 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4348 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4349 for (i=0; i<a->B->cmap->n; i++) { 4350 colors[i] = allcolors[a->garray[i]]; 4351 } 4352 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4353 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4354 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4355 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4356 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4357 ISColoringValue *colors; 4358 PetscInt *larray; 4359 ISColoring ocoloring; 4360 4361 /* set coloring for diagonal portion */ 4362 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4363 for (i=0; i<a->A->cmap->n; i++) { 4364 larray[i] = i + A->cmap->rstart; 4365 } 4366 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4367 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4368 for (i=0; i<a->A->cmap->n; i++) { 4369 colors[i] = coloring->colors[larray[i]]; 4370 } 4371 ierr = PetscFree(larray);CHKERRQ(ierr); 4372 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4373 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4374 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4375 4376 /* set coloring for off-diagonal portion */ 4377 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4378 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4379 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4380 for (i=0; i<a->B->cmap->n; i++) { 4381 colors[i] = coloring->colors[larray[i]]; 4382 } 4383 ierr = PetscFree(larray);CHKERRQ(ierr); 4384 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4385 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4386 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4387 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4388 PetscFunctionReturn(0); 4389 } 4390 4391 #undef __FUNCT__ 4392 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4393 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4394 { 4395 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4396 PetscErrorCode ierr; 4397 4398 PetscFunctionBegin; 4399 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4400 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4401 PetscFunctionReturn(0); 4402 } 4403 4404 #undef __FUNCT__ 4405 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4406 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4407 { 4408 PetscErrorCode ierr; 4409 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4410 PetscInt *indx; 4411 4412 PetscFunctionBegin; 4413 /* This routine will ONLY return MPIAIJ type matrix */ 4414 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4415 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4416 if (n == PETSC_DECIDE) { 4417 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4418 } 4419 /* Check sum(n) = N */ 4420 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4421 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4422 4423 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4424 rstart -= m; 4425 4426 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4427 for (i=0; i<m; i++) { 4428 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4429 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4430 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4431 } 4432 4433 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4434 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4435 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4436 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4437 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4438 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4439 PetscFunctionReturn(0); 4440 } 4441 4442 #undef __FUNCT__ 4443 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4444 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4445 { 4446 PetscErrorCode ierr; 4447 PetscInt m,N,i,rstart,nnz,Ii; 4448 PetscInt *indx; 4449 PetscScalar *values; 4450 4451 PetscFunctionBegin; 4452 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4453 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4454 for (i=0; i<m; i++) { 4455 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4456 Ii = i + rstart; 4457 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4458 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4459 } 4460 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4461 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4462 PetscFunctionReturn(0); 4463 } 4464 4465 #undef __FUNCT__ 4466 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4467 /*@ 4468 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4469 matrices from each processor 4470 4471 Collective on MPI_Comm 4472 4473 Input Parameters: 4474 + comm - the communicators the parallel matrix will live on 4475 . inmat - the input sequential matrices 4476 . n - number of local columns (or PETSC_DECIDE) 4477 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4478 4479 Output Parameter: 4480 . outmat - the parallel matrix generated 4481 4482 Level: advanced 4483 4484 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4485 4486 @*/ 4487 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4488 { 4489 PetscErrorCode ierr; 4490 PetscMPIInt size; 4491 4492 PetscFunctionBegin; 4493 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4494 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4495 if (size == 1) { 4496 if (scall == MAT_INITIAL_MATRIX) { 4497 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4498 } else { 4499 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4500 } 4501 } else { 4502 if (scall == MAT_INITIAL_MATRIX) { 4503 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4504 } 4505 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4506 } 4507 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4508 PetscFunctionReturn(0); 4509 } 4510 4511 #undef __FUNCT__ 4512 #define __FUNCT__ "MatFileSplit" 4513 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4514 { 4515 PetscErrorCode ierr; 4516 PetscMPIInt rank; 4517 PetscInt m,N,i,rstart,nnz; 4518 size_t len; 4519 const PetscInt *indx; 4520 PetscViewer out; 4521 char *name; 4522 Mat B; 4523 const PetscScalar *values; 4524 4525 PetscFunctionBegin; 4526 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4527 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4528 /* Should this be the type of the diagonal block of A? */ 4529 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4530 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4531 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4532 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4533 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4534 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4535 for (i=0; i<m; i++) { 4536 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4537 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4538 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4539 } 4540 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4541 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4542 4543 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4544 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4545 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4546 sprintf(name,"%s.%d",outfile,rank); 4547 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4548 ierr = PetscFree(name);CHKERRQ(ierr); 4549 ierr = MatView(B,out);CHKERRQ(ierr); 4550 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4551 ierr = MatDestroy(&B);CHKERRQ(ierr); 4552 PetscFunctionReturn(0); 4553 } 4554 4555 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4556 #undef __FUNCT__ 4557 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4558 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4559 { 4560 PetscErrorCode ierr; 4561 Mat_Merge_SeqsToMPI *merge; 4562 PetscContainer container; 4563 4564 PetscFunctionBegin; 4565 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4566 if (container) { 4567 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4568 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4569 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4570 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4571 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4572 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4573 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4574 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4575 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4576 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4577 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4578 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4579 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4580 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4581 ierr = PetscFree(merge);CHKERRQ(ierr); 4582 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4583 } 4584 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4585 PetscFunctionReturn(0); 4586 } 4587 4588 #include <../src/mat/utils/freespace.h> 4589 #include <petscbt.h> 4590 4591 #undef __FUNCT__ 4592 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4593 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4594 { 4595 PetscErrorCode ierr; 4596 MPI_Comm comm; 4597 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4598 PetscMPIInt size,rank,taga,*len_s; 4599 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4600 PetscInt proc,m; 4601 PetscInt **buf_ri,**buf_rj; 4602 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4603 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4604 MPI_Request *s_waits,*r_waits; 4605 MPI_Status *status; 4606 MatScalar *aa=a->a; 4607 MatScalar **abuf_r,*ba_i; 4608 Mat_Merge_SeqsToMPI *merge; 4609 PetscContainer container; 4610 4611 PetscFunctionBegin; 4612 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4613 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4614 4615 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4616 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4617 4618 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4619 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4620 4621 bi = merge->bi; 4622 bj = merge->bj; 4623 buf_ri = merge->buf_ri; 4624 buf_rj = merge->buf_rj; 4625 4626 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4627 owners = merge->rowmap->range; 4628 len_s = merge->len_s; 4629 4630 /* send and recv matrix values */ 4631 /*-----------------------------*/ 4632 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4633 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4634 4635 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4636 for (proc=0,k=0; proc<size; proc++) { 4637 if (!len_s[proc]) continue; 4638 i = owners[proc]; 4639 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4640 k++; 4641 } 4642 4643 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4644 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4645 ierr = PetscFree(status);CHKERRQ(ierr); 4646 4647 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4648 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4649 4650 /* insert mat values of mpimat */ 4651 /*----------------------------*/ 4652 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4653 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4654 4655 for (k=0; k<merge->nrecv; k++) { 4656 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4657 nrows = *(buf_ri_k[k]); 4658 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4659 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4660 } 4661 4662 /* set values of ba */ 4663 m = merge->rowmap->n; 4664 for (i=0; i<m; i++) { 4665 arow = owners[rank] + i; 4666 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4667 bnzi = bi[i+1] - bi[i]; 4668 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4669 4670 /* add local non-zero vals of this proc's seqmat into ba */ 4671 anzi = ai[arow+1] - ai[arow]; 4672 aj = a->j + ai[arow]; 4673 aa = a->a + ai[arow]; 4674 nextaj = 0; 4675 for (j=0; nextaj<anzi; j++) { 4676 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4677 ba_i[j] += aa[nextaj++]; 4678 } 4679 } 4680 4681 /* add received vals into ba */ 4682 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4683 /* i-th row */ 4684 if (i == *nextrow[k]) { 4685 anzi = *(nextai[k]+1) - *nextai[k]; 4686 aj = buf_rj[k] + *(nextai[k]); 4687 aa = abuf_r[k] + *(nextai[k]); 4688 nextaj = 0; 4689 for (j=0; nextaj<anzi; j++) { 4690 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4691 ba_i[j] += aa[nextaj++]; 4692 } 4693 } 4694 nextrow[k]++; nextai[k]++; 4695 } 4696 } 4697 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4698 } 4699 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4700 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4701 4702 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4703 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4704 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4705 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4706 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4707 PetscFunctionReturn(0); 4708 } 4709 4710 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4711 4712 #undef __FUNCT__ 4713 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4714 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4715 { 4716 PetscErrorCode ierr; 4717 Mat B_mpi; 4718 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4719 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4720 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4721 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4722 PetscInt len,proc,*dnz,*onz,bs,cbs; 4723 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4724 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4725 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4726 MPI_Status *status; 4727 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4728 PetscBT lnkbt; 4729 Mat_Merge_SeqsToMPI *merge; 4730 PetscContainer container; 4731 4732 PetscFunctionBegin; 4733 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4734 4735 /* make sure it is a PETSc comm */ 4736 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4737 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4738 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4739 4740 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4741 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4742 4743 /* determine row ownership */ 4744 /*---------------------------------------------------------*/ 4745 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4746 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4747 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4748 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4749 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4750 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4751 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4752 4753 m = merge->rowmap->n; 4754 owners = merge->rowmap->range; 4755 4756 /* determine the number of messages to send, their lengths */ 4757 /*---------------------------------------------------------*/ 4758 len_s = merge->len_s; 4759 4760 len = 0; /* length of buf_si[] */ 4761 merge->nsend = 0; 4762 for (proc=0; proc<size; proc++) { 4763 len_si[proc] = 0; 4764 if (proc == rank) { 4765 len_s[proc] = 0; 4766 } else { 4767 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4768 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4769 } 4770 if (len_s[proc]) { 4771 merge->nsend++; 4772 nrows = 0; 4773 for (i=owners[proc]; i<owners[proc+1]; i++) { 4774 if (ai[i+1] > ai[i]) nrows++; 4775 } 4776 len_si[proc] = 2*(nrows+1); 4777 len += len_si[proc]; 4778 } 4779 } 4780 4781 /* determine the number and length of messages to receive for ij-structure */ 4782 /*-------------------------------------------------------------------------*/ 4783 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4784 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4785 4786 /* post the Irecv of j-structure */ 4787 /*-------------------------------*/ 4788 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4789 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4790 4791 /* post the Isend of j-structure */ 4792 /*--------------------------------*/ 4793 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4794 4795 for (proc=0, k=0; proc<size; proc++) { 4796 if (!len_s[proc]) continue; 4797 i = owners[proc]; 4798 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4799 k++; 4800 } 4801 4802 /* receives and sends of j-structure are complete */ 4803 /*------------------------------------------------*/ 4804 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4805 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4806 4807 /* send and recv i-structure */ 4808 /*---------------------------*/ 4809 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4810 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4811 4812 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4813 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4814 for (proc=0,k=0; proc<size; proc++) { 4815 if (!len_s[proc]) continue; 4816 /* form outgoing message for i-structure: 4817 buf_si[0]: nrows to be sent 4818 [1:nrows]: row index (global) 4819 [nrows+1:2*nrows+1]: i-structure index 4820 */ 4821 /*-------------------------------------------*/ 4822 nrows = len_si[proc]/2 - 1; 4823 buf_si_i = buf_si + nrows+1; 4824 buf_si[0] = nrows; 4825 buf_si_i[0] = 0; 4826 nrows = 0; 4827 for (i=owners[proc]; i<owners[proc+1]; i++) { 4828 anzi = ai[i+1] - ai[i]; 4829 if (anzi) { 4830 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4831 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4832 nrows++; 4833 } 4834 } 4835 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4836 k++; 4837 buf_si += len_si[proc]; 4838 } 4839 4840 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4841 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4842 4843 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4844 for (i=0; i<merge->nrecv; i++) { 4845 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4846 } 4847 4848 ierr = PetscFree(len_si);CHKERRQ(ierr); 4849 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4850 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4851 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4852 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4853 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4854 ierr = PetscFree(status);CHKERRQ(ierr); 4855 4856 /* compute a local seq matrix in each processor */ 4857 /*----------------------------------------------*/ 4858 /* allocate bi array and free space for accumulating nonzero column info */ 4859 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4860 bi[0] = 0; 4861 4862 /* create and initialize a linked list */ 4863 nlnk = N+1; 4864 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4865 4866 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4867 len = ai[owners[rank+1]] - ai[owners[rank]]; 4868 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4869 4870 current_space = free_space; 4871 4872 /* determine symbolic info for each local row */ 4873 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4874 4875 for (k=0; k<merge->nrecv; k++) { 4876 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4877 nrows = *buf_ri_k[k]; 4878 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4879 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4880 } 4881 4882 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4883 len = 0; 4884 for (i=0; i<m; i++) { 4885 bnzi = 0; 4886 /* add local non-zero cols of this proc's seqmat into lnk */ 4887 arow = owners[rank] + i; 4888 anzi = ai[arow+1] - ai[arow]; 4889 aj = a->j + ai[arow]; 4890 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4891 bnzi += nlnk; 4892 /* add received col data into lnk */ 4893 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4894 if (i == *nextrow[k]) { /* i-th row */ 4895 anzi = *(nextai[k]+1) - *nextai[k]; 4896 aj = buf_rj[k] + *nextai[k]; 4897 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4898 bnzi += nlnk; 4899 nextrow[k]++; nextai[k]++; 4900 } 4901 } 4902 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4903 4904 /* if free space is not available, make more free space */ 4905 if (current_space->local_remaining<bnzi) { 4906 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4907 nspacedouble++; 4908 } 4909 /* copy data into free space, then initialize lnk */ 4910 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4911 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4912 4913 current_space->array += bnzi; 4914 current_space->local_used += bnzi; 4915 current_space->local_remaining -= bnzi; 4916 4917 bi[i+1] = bi[i] + bnzi; 4918 } 4919 4920 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4921 4922 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4923 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4924 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4925 4926 /* create symbolic parallel matrix B_mpi */ 4927 /*---------------------------------------*/ 4928 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4929 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4930 if (n==PETSC_DECIDE) { 4931 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4932 } else { 4933 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4934 } 4935 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4936 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4937 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4938 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4939 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4940 4941 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4942 B_mpi->assembled = PETSC_FALSE; 4943 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4944 merge->bi = bi; 4945 merge->bj = bj; 4946 merge->buf_ri = buf_ri; 4947 merge->buf_rj = buf_rj; 4948 merge->coi = NULL; 4949 merge->coj = NULL; 4950 merge->owners_co = NULL; 4951 4952 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4953 4954 /* attach the supporting struct to B_mpi for reuse */ 4955 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4956 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4957 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4958 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4959 *mpimat = B_mpi; 4960 4961 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4962 PetscFunctionReturn(0); 4963 } 4964 4965 #undef __FUNCT__ 4966 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4967 /*@C 4968 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4969 matrices from each processor 4970 4971 Collective on MPI_Comm 4972 4973 Input Parameters: 4974 + comm - the communicators the parallel matrix will live on 4975 . seqmat - the input sequential matrices 4976 . m - number of local rows (or PETSC_DECIDE) 4977 . n - number of local columns (or PETSC_DECIDE) 4978 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4979 4980 Output Parameter: 4981 . mpimat - the parallel matrix generated 4982 4983 Level: advanced 4984 4985 Notes: 4986 The dimensions of the sequential matrix in each processor MUST be the same. 4987 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4988 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4989 @*/ 4990 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4991 { 4992 PetscErrorCode ierr; 4993 PetscMPIInt size; 4994 4995 PetscFunctionBegin; 4996 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4997 if (size == 1) { 4998 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4999 if (scall == MAT_INITIAL_MATRIX) { 5000 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5001 } else { 5002 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5003 } 5004 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5005 PetscFunctionReturn(0); 5006 } 5007 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5008 if (scall == MAT_INITIAL_MATRIX) { 5009 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5010 } 5011 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5012 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5013 PetscFunctionReturn(0); 5014 } 5015 5016 #undef __FUNCT__ 5017 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5018 /*@ 5019 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5020 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5021 with MatGetSize() 5022 5023 Not Collective 5024 5025 Input Parameters: 5026 + A - the matrix 5027 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5028 5029 Output Parameter: 5030 . A_loc - the local sequential matrix generated 5031 5032 Level: developer 5033 5034 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5035 5036 @*/ 5037 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5038 { 5039 PetscErrorCode ierr; 5040 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5041 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5042 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5043 MatScalar *aa=a->a,*ba=b->a,*cam; 5044 PetscScalar *ca; 5045 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5046 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5047 PetscBool match; 5048 5049 PetscFunctionBegin; 5050 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5051 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5052 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5053 if (scall == MAT_INITIAL_MATRIX) { 5054 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5055 ci[0] = 0; 5056 for (i=0; i<am; i++) { 5057 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5058 } 5059 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5060 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5061 k = 0; 5062 for (i=0; i<am; i++) { 5063 ncols_o = bi[i+1] - bi[i]; 5064 ncols_d = ai[i+1] - ai[i]; 5065 /* off-diagonal portion of A */ 5066 for (jo=0; jo<ncols_o; jo++) { 5067 col = cmap[*bj]; 5068 if (col >= cstart) break; 5069 cj[k] = col; bj++; 5070 ca[k++] = *ba++; 5071 } 5072 /* diagonal portion of A */ 5073 for (j=0; j<ncols_d; j++) { 5074 cj[k] = cstart + *aj++; 5075 ca[k++] = *aa++; 5076 } 5077 /* off-diagonal portion of A */ 5078 for (j=jo; j<ncols_o; j++) { 5079 cj[k] = cmap[*bj++]; 5080 ca[k++] = *ba++; 5081 } 5082 } 5083 /* put together the new matrix */ 5084 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5085 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5086 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5087 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5088 mat->free_a = PETSC_TRUE; 5089 mat->free_ij = PETSC_TRUE; 5090 mat->nonew = 0; 5091 } else if (scall == MAT_REUSE_MATRIX) { 5092 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5093 ci = mat->i; cj = mat->j; cam = mat->a; 5094 for (i=0; i<am; i++) { 5095 /* off-diagonal portion of A */ 5096 ncols_o = bi[i+1] - bi[i]; 5097 for (jo=0; jo<ncols_o; jo++) { 5098 col = cmap[*bj]; 5099 if (col >= cstart) break; 5100 *cam++ = *ba++; bj++; 5101 } 5102 /* diagonal portion of A */ 5103 ncols_d = ai[i+1] - ai[i]; 5104 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5105 /* off-diagonal portion of A */ 5106 for (j=jo; j<ncols_o; j++) { 5107 *cam++ = *ba++; bj++; 5108 } 5109 } 5110 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5111 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5112 PetscFunctionReturn(0); 5113 } 5114 5115 #undef __FUNCT__ 5116 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5117 /*@C 5118 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5119 5120 Not Collective 5121 5122 Input Parameters: 5123 + A - the matrix 5124 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5125 - row, col - index sets of rows and columns to extract (or NULL) 5126 5127 Output Parameter: 5128 . A_loc - the local sequential matrix generated 5129 5130 Level: developer 5131 5132 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5133 5134 @*/ 5135 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5136 { 5137 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5138 PetscErrorCode ierr; 5139 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5140 IS isrowa,iscola; 5141 Mat *aloc; 5142 PetscBool match; 5143 5144 PetscFunctionBegin; 5145 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5146 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5147 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5148 if (!row) { 5149 start = A->rmap->rstart; end = A->rmap->rend; 5150 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5151 } else { 5152 isrowa = *row; 5153 } 5154 if (!col) { 5155 start = A->cmap->rstart; 5156 cmap = a->garray; 5157 nzA = a->A->cmap->n; 5158 nzB = a->B->cmap->n; 5159 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5160 ncols = 0; 5161 for (i=0; i<nzB; i++) { 5162 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5163 else break; 5164 } 5165 imark = i; 5166 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5167 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5168 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5169 } else { 5170 iscola = *col; 5171 } 5172 if (scall != MAT_INITIAL_MATRIX) { 5173 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5174 aloc[0] = *A_loc; 5175 } 5176 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5177 *A_loc = aloc[0]; 5178 ierr = PetscFree(aloc);CHKERRQ(ierr); 5179 if (!row) { 5180 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5181 } 5182 if (!col) { 5183 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5184 } 5185 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5186 PetscFunctionReturn(0); 5187 } 5188 5189 #undef __FUNCT__ 5190 #define __FUNCT__ "MatGetBrowsOfAcols" 5191 /*@C 5192 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5193 5194 Collective on Mat 5195 5196 Input Parameters: 5197 + A,B - the matrices in mpiaij format 5198 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5199 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5200 5201 Output Parameter: 5202 + rowb, colb - index sets of rows and columns of B to extract 5203 - B_seq - the sequential matrix generated 5204 5205 Level: developer 5206 5207 @*/ 5208 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5209 { 5210 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5211 PetscErrorCode ierr; 5212 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5213 IS isrowb,iscolb; 5214 Mat *bseq=NULL; 5215 5216 PetscFunctionBegin; 5217 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5218 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5219 } 5220 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5221 5222 if (scall == MAT_INITIAL_MATRIX) { 5223 start = A->cmap->rstart; 5224 cmap = a->garray; 5225 nzA = a->A->cmap->n; 5226 nzB = a->B->cmap->n; 5227 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5228 ncols = 0; 5229 for (i=0; i<nzB; i++) { /* row < local row index */ 5230 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5231 else break; 5232 } 5233 imark = i; 5234 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5235 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5236 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5237 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5238 } else { 5239 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5240 isrowb = *rowb; iscolb = *colb; 5241 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5242 bseq[0] = *B_seq; 5243 } 5244 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5245 *B_seq = bseq[0]; 5246 ierr = PetscFree(bseq);CHKERRQ(ierr); 5247 if (!rowb) { 5248 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5249 } else { 5250 *rowb = isrowb; 5251 } 5252 if (!colb) { 5253 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5254 } else { 5255 *colb = iscolb; 5256 } 5257 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5258 PetscFunctionReturn(0); 5259 } 5260 5261 #undef __FUNCT__ 5262 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5263 /* 5264 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5265 of the OFF-DIAGONAL portion of local A 5266 5267 Collective on Mat 5268 5269 Input Parameters: 5270 + A,B - the matrices in mpiaij format 5271 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5272 5273 Output Parameter: 5274 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5275 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5276 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5277 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5278 5279 Level: developer 5280 5281 */ 5282 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5283 { 5284 VecScatter_MPI_General *gen_to,*gen_from; 5285 PetscErrorCode ierr; 5286 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5287 Mat_SeqAIJ *b_oth; 5288 VecScatter ctx =a->Mvctx; 5289 MPI_Comm comm; 5290 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5291 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5292 PetscScalar *rvalues,*svalues; 5293 MatScalar *b_otha,*bufa,*bufA; 5294 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5295 MPI_Request *rwaits = NULL,*swaits = NULL; 5296 MPI_Status *sstatus,rstatus; 5297 PetscMPIInt jj; 5298 PetscInt *cols,sbs,rbs; 5299 PetscScalar *vals; 5300 5301 PetscFunctionBegin; 5302 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5303 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5304 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5305 } 5306 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5307 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5308 5309 gen_to = (VecScatter_MPI_General*)ctx->todata; 5310 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5311 rvalues = gen_from->values; /* holds the length of receiving row */ 5312 svalues = gen_to->values; /* holds the length of sending row */ 5313 nrecvs = gen_from->n; 5314 nsends = gen_to->n; 5315 5316 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5317 srow = gen_to->indices; /* local row index to be sent */ 5318 sstarts = gen_to->starts; 5319 sprocs = gen_to->procs; 5320 sstatus = gen_to->sstatus; 5321 sbs = gen_to->bs; 5322 rstarts = gen_from->starts; 5323 rprocs = gen_from->procs; 5324 rbs = gen_from->bs; 5325 5326 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5327 if (scall == MAT_INITIAL_MATRIX) { 5328 /* i-array */ 5329 /*---------*/ 5330 /* post receives */ 5331 for (i=0; i<nrecvs; i++) { 5332 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5333 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5334 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5335 } 5336 5337 /* pack the outgoing message */ 5338 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5339 5340 sstartsj[0] = 0; 5341 rstartsj[0] = 0; 5342 len = 0; /* total length of j or a array to be sent */ 5343 k = 0; 5344 for (i=0; i<nsends; i++) { 5345 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5346 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5347 for (j=0; j<nrows; j++) { 5348 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5349 for (l=0; l<sbs; l++) { 5350 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5351 5352 rowlen[j*sbs+l] = ncols; 5353 5354 len += ncols; 5355 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5356 } 5357 k++; 5358 } 5359 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5360 5361 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5362 } 5363 /* recvs and sends of i-array are completed */ 5364 i = nrecvs; 5365 while (i--) { 5366 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5367 } 5368 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5369 5370 /* allocate buffers for sending j and a arrays */ 5371 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5372 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5373 5374 /* create i-array of B_oth */ 5375 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5376 5377 b_othi[0] = 0; 5378 len = 0; /* total length of j or a array to be received */ 5379 k = 0; 5380 for (i=0; i<nrecvs; i++) { 5381 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5382 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5383 for (j=0; j<nrows; j++) { 5384 b_othi[k+1] = b_othi[k] + rowlen[j]; 5385 len += rowlen[j]; k++; 5386 } 5387 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5388 } 5389 5390 /* allocate space for j and a arrrays of B_oth */ 5391 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5392 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5393 5394 /* j-array */ 5395 /*---------*/ 5396 /* post receives of j-array */ 5397 for (i=0; i<nrecvs; i++) { 5398 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5399 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5400 } 5401 5402 /* pack the outgoing message j-array */ 5403 k = 0; 5404 for (i=0; i<nsends; i++) { 5405 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5406 bufJ = bufj+sstartsj[i]; 5407 for (j=0; j<nrows; j++) { 5408 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5409 for (ll=0; ll<sbs; ll++) { 5410 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5411 for (l=0; l<ncols; l++) { 5412 *bufJ++ = cols[l]; 5413 } 5414 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5415 } 5416 } 5417 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5418 } 5419 5420 /* recvs and sends of j-array are completed */ 5421 i = nrecvs; 5422 while (i--) { 5423 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5424 } 5425 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5426 } else if (scall == MAT_REUSE_MATRIX) { 5427 sstartsj = *startsj_s; 5428 rstartsj = *startsj_r; 5429 bufa = *bufa_ptr; 5430 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5431 b_otha = b_oth->a; 5432 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5433 5434 /* a-array */ 5435 /*---------*/ 5436 /* post receives of a-array */ 5437 for (i=0; i<nrecvs; i++) { 5438 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5439 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5440 } 5441 5442 /* pack the outgoing message a-array */ 5443 k = 0; 5444 for (i=0; i<nsends; i++) { 5445 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5446 bufA = bufa+sstartsj[i]; 5447 for (j=0; j<nrows; j++) { 5448 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5449 for (ll=0; ll<sbs; ll++) { 5450 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5451 for (l=0; l<ncols; l++) { 5452 *bufA++ = vals[l]; 5453 } 5454 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5455 } 5456 } 5457 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5458 } 5459 /* recvs and sends of a-array are completed */ 5460 i = nrecvs; 5461 while (i--) { 5462 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5463 } 5464 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5465 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5466 5467 if (scall == MAT_INITIAL_MATRIX) { 5468 /* put together the new matrix */ 5469 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5470 5471 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5472 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5473 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5474 b_oth->free_a = PETSC_TRUE; 5475 b_oth->free_ij = PETSC_TRUE; 5476 b_oth->nonew = 0; 5477 5478 ierr = PetscFree(bufj);CHKERRQ(ierr); 5479 if (!startsj_s || !bufa_ptr) { 5480 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5481 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5482 } else { 5483 *startsj_s = sstartsj; 5484 *startsj_r = rstartsj; 5485 *bufa_ptr = bufa; 5486 } 5487 } 5488 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5489 PetscFunctionReturn(0); 5490 } 5491 5492 #undef __FUNCT__ 5493 #define __FUNCT__ "MatGetCommunicationStructs" 5494 /*@C 5495 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5496 5497 Not Collective 5498 5499 Input Parameters: 5500 . A - The matrix in mpiaij format 5501 5502 Output Parameter: 5503 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5504 . colmap - A map from global column index to local index into lvec 5505 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5506 5507 Level: developer 5508 5509 @*/ 5510 #if defined(PETSC_USE_CTABLE) 5511 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5512 #else 5513 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5514 #endif 5515 { 5516 Mat_MPIAIJ *a; 5517 5518 PetscFunctionBegin; 5519 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5520 PetscValidPointer(lvec, 2); 5521 PetscValidPointer(colmap, 3); 5522 PetscValidPointer(multScatter, 4); 5523 a = (Mat_MPIAIJ*) A->data; 5524 if (lvec) *lvec = a->lvec; 5525 if (colmap) *colmap = a->colmap; 5526 if (multScatter) *multScatter = a->Mvctx; 5527 PetscFunctionReturn(0); 5528 } 5529 5530 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5531 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5532 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5533 5534 #undef __FUNCT__ 5535 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5536 /* 5537 Computes (B'*A')' since computing B*A directly is untenable 5538 5539 n p p 5540 ( ) ( ) ( ) 5541 m ( A ) * n ( B ) = m ( C ) 5542 ( ) ( ) ( ) 5543 5544 */ 5545 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5546 { 5547 PetscErrorCode ierr; 5548 Mat At,Bt,Ct; 5549 5550 PetscFunctionBegin; 5551 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5552 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5553 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5554 ierr = MatDestroy(&At);CHKERRQ(ierr); 5555 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5556 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5557 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5558 PetscFunctionReturn(0); 5559 } 5560 5561 #undef __FUNCT__ 5562 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5563 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5564 { 5565 PetscErrorCode ierr; 5566 PetscInt m=A->rmap->n,n=B->cmap->n; 5567 Mat Cmat; 5568 5569 PetscFunctionBegin; 5570 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5571 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5572 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5573 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5574 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5575 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5576 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5577 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5578 5579 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5580 5581 *C = Cmat; 5582 PetscFunctionReturn(0); 5583 } 5584 5585 /* ----------------------------------------------------------------*/ 5586 #undef __FUNCT__ 5587 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5588 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5589 { 5590 PetscErrorCode ierr; 5591 5592 PetscFunctionBegin; 5593 if (scall == MAT_INITIAL_MATRIX) { 5594 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5595 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5596 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5597 } 5598 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5599 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5600 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5601 PetscFunctionReturn(0); 5602 } 5603 5604 #if defined(PETSC_HAVE_MUMPS) 5605 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5606 #endif 5607 #if defined(PETSC_HAVE_PASTIX) 5608 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5609 #endif 5610 #if defined(PETSC_HAVE_SUPERLU_DIST) 5611 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5612 #endif 5613 #if defined(PETSC_HAVE_CLIQUE) 5614 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5615 #endif 5616 5617 /*MC 5618 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5619 5620 Options Database Keys: 5621 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5622 5623 Level: beginner 5624 5625 .seealso: MatCreateAIJ() 5626 M*/ 5627 5628 #undef __FUNCT__ 5629 #define __FUNCT__ "MatCreate_MPIAIJ" 5630 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5631 { 5632 Mat_MPIAIJ *b; 5633 PetscErrorCode ierr; 5634 PetscMPIInt size; 5635 5636 PetscFunctionBegin; 5637 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5638 5639 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5640 B->data = (void*)b; 5641 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5642 B->assembled = PETSC_FALSE; 5643 B->insertmode = NOT_SET_VALUES; 5644 b->size = size; 5645 5646 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5647 5648 /* build cache for off array entries formed */ 5649 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5650 5651 b->donotstash = PETSC_FALSE; 5652 b->colmap = 0; 5653 b->garray = 0; 5654 b->roworiented = PETSC_TRUE; 5655 5656 /* stuff used for matrix vector multiply */ 5657 b->lvec = NULL; 5658 b->Mvctx = NULL; 5659 5660 /* stuff for MatGetRow() */ 5661 b->rowindices = 0; 5662 b->rowvalues = 0; 5663 b->getrowactive = PETSC_FALSE; 5664 5665 /* flexible pointer used in CUSP/CUSPARSE classes */ 5666 b->spptr = NULL; 5667 5668 #if defined(PETSC_HAVE_MUMPS) 5669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5670 #endif 5671 #if defined(PETSC_HAVE_PASTIX) 5672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5673 #endif 5674 #if defined(PETSC_HAVE_SUPERLU_DIST) 5675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5676 #endif 5677 #if defined(PETSC_HAVE_CLIQUE) 5678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5679 #endif 5680 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5684 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5685 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5686 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5687 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5688 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5689 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5690 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5691 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5692 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5693 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5694 PetscFunctionReturn(0); 5695 } 5696 5697 #undef __FUNCT__ 5698 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5699 /*@ 5700 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5701 and "off-diagonal" part of the matrix in CSR format. 5702 5703 Collective on MPI_Comm 5704 5705 Input Parameters: 5706 + comm - MPI communicator 5707 . m - number of local rows (Cannot be PETSC_DECIDE) 5708 . n - This value should be the same as the local size used in creating the 5709 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5710 calculated if N is given) For square matrices n is almost always m. 5711 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5712 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5713 . i - row indices for "diagonal" portion of matrix 5714 . j - column indices 5715 . a - matrix values 5716 . oi - row indices for "off-diagonal" portion of matrix 5717 . oj - column indices 5718 - oa - matrix values 5719 5720 Output Parameter: 5721 . mat - the matrix 5722 5723 Level: advanced 5724 5725 Notes: 5726 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5727 must free the arrays once the matrix has been destroyed and not before. 5728 5729 The i and j indices are 0 based 5730 5731 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5732 5733 This sets local rows and cannot be used to set off-processor values. 5734 5735 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5736 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5737 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5738 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5739 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5740 communication if it is known that only local entries will be set. 5741 5742 .keywords: matrix, aij, compressed row, sparse, parallel 5743 5744 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5745 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5746 @*/ 5747 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5748 { 5749 PetscErrorCode ierr; 5750 Mat_MPIAIJ *maij; 5751 5752 PetscFunctionBegin; 5753 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5754 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5755 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5756 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5757 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5758 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5759 maij = (Mat_MPIAIJ*) (*mat)->data; 5760 5761 (*mat)->preallocated = PETSC_TRUE; 5762 5763 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5764 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5765 5766 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5767 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5768 5769 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5770 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5771 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5772 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5773 5774 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5775 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5776 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5777 PetscFunctionReturn(0); 5778 } 5779 5780 /* 5781 Special version for direct calls from Fortran 5782 */ 5783 #include <petsc-private/fortranimpl.h> 5784 5785 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5786 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5787 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5788 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5789 #endif 5790 5791 /* Change these macros so can be used in void function */ 5792 #undef CHKERRQ 5793 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5794 #undef SETERRQ2 5795 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5796 #undef SETERRQ3 5797 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5798 #undef SETERRQ 5799 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5800 5801 #undef __FUNCT__ 5802 #define __FUNCT__ "matsetvaluesmpiaij_" 5803 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5804 { 5805 Mat mat = *mmat; 5806 PetscInt m = *mm, n = *mn; 5807 InsertMode addv = *maddv; 5808 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5809 PetscScalar value; 5810 PetscErrorCode ierr; 5811 5812 MatCheckPreallocated(mat,1); 5813 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5814 5815 #if defined(PETSC_USE_DEBUG) 5816 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5817 #endif 5818 { 5819 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5820 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5821 PetscBool roworiented = aij->roworiented; 5822 5823 /* Some Variables required in the macro */ 5824 Mat A = aij->A; 5825 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5826 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5827 MatScalar *aa = a->a; 5828 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5829 Mat B = aij->B; 5830 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5831 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5832 MatScalar *ba = b->a; 5833 5834 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5835 PetscInt nonew = a->nonew; 5836 MatScalar *ap1,*ap2; 5837 5838 PetscFunctionBegin; 5839 for (i=0; i<m; i++) { 5840 if (im[i] < 0) continue; 5841 #if defined(PETSC_USE_DEBUG) 5842 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5843 #endif 5844 if (im[i] >= rstart && im[i] < rend) { 5845 row = im[i] - rstart; 5846 lastcol1 = -1; 5847 rp1 = aj + ai[row]; 5848 ap1 = aa + ai[row]; 5849 rmax1 = aimax[row]; 5850 nrow1 = ailen[row]; 5851 low1 = 0; 5852 high1 = nrow1; 5853 lastcol2 = -1; 5854 rp2 = bj + bi[row]; 5855 ap2 = ba + bi[row]; 5856 rmax2 = bimax[row]; 5857 nrow2 = bilen[row]; 5858 low2 = 0; 5859 high2 = nrow2; 5860 5861 for (j=0; j<n; j++) { 5862 if (roworiented) value = v[i*n+j]; 5863 else value = v[i+j*m]; 5864 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5865 if (in[j] >= cstart && in[j] < cend) { 5866 col = in[j] - cstart; 5867 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5868 } else if (in[j] < 0) continue; 5869 #if defined(PETSC_USE_DEBUG) 5870 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5871 #endif 5872 else { 5873 if (mat->was_assembled) { 5874 if (!aij->colmap) { 5875 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5876 } 5877 #if defined(PETSC_USE_CTABLE) 5878 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5879 col--; 5880 #else 5881 col = aij->colmap[in[j]] - 1; 5882 #endif 5883 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5884 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5885 col = in[j]; 5886 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5887 B = aij->B; 5888 b = (Mat_SeqAIJ*)B->data; 5889 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5890 rp2 = bj + bi[row]; 5891 ap2 = ba + bi[row]; 5892 rmax2 = bimax[row]; 5893 nrow2 = bilen[row]; 5894 low2 = 0; 5895 high2 = nrow2; 5896 bm = aij->B->rmap->n; 5897 ba = b->a; 5898 } 5899 } else col = in[j]; 5900 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5901 } 5902 } 5903 } else if (!aij->donotstash) { 5904 if (roworiented) { 5905 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5906 } else { 5907 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5908 } 5909 } 5910 } 5911 } 5912 PetscFunctionReturnVoid(); 5913 } 5914 5915