1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 138 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 139 if (type == NORM_2) { 140 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 141 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 142 } 143 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 144 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 145 } 146 } else if (type == NORM_1) { 147 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 148 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 149 } 150 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 151 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 152 } 153 } else if (type == NORM_INFINITY) { 154 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 155 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 156 } 157 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 158 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 159 } 160 161 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 162 if (type == NORM_INFINITY) { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 164 } else { 165 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 166 } 167 ierr = PetscFree(work);CHKERRQ(ierr); 168 if (type == NORM_2) { 169 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 170 } 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNCT__ 175 #define __FUNCT__ "MatDistribute_MPIAIJ" 176 /* 177 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 178 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 179 180 Only for square matrices 181 182 Used by a preconditioner, hence PETSC_EXTERN 183 */ 184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 185 { 186 PetscMPIInt rank,size; 187 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 188 PetscErrorCode ierr; 189 Mat mat; 190 Mat_SeqAIJ *gmata; 191 PetscMPIInt tag; 192 MPI_Status status; 193 PetscBool aij; 194 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 195 196 PetscFunctionBegin; 197 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 199 if (!rank) { 200 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 201 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 202 } 203 if (reuse == MAT_INITIAL_MATRIX) { 204 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 205 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 206 if (!rank) { 207 bses[0] = gmat->rmap->bs; 208 bses[1] = gmat->cmap->bs; 209 } 210 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 211 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 212 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 213 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 214 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 215 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 216 217 rowners[0] = 0; 218 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 219 rstart = rowners[rank]; 220 rend = rowners[rank+1]; 221 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 222 if (!rank) { 223 gmata = (Mat_SeqAIJ*) gmat->data; 224 /* send row lengths to all processors */ 225 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 226 for (i=1; i<size; i++) { 227 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 228 } 229 /* determine number diagonal and off-diagonal counts */ 230 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 231 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 232 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 233 jj = 0; 234 for (i=0; i<m; i++) { 235 for (j=0; j<dlens[i]; j++) { 236 if (gmata->j[jj] < rstart) ld[i]++; 237 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 238 jj++; 239 } 240 } 241 /* send column indices to other processes */ 242 for (i=1; i<size; i++) { 243 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 244 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 245 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 246 } 247 248 /* send numerical values to other processes */ 249 for (i=1; i<size; i++) { 250 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 251 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 252 } 253 gmataa = gmata->a; 254 gmataj = gmata->j; 255 256 } else { 257 /* receive row lengths */ 258 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 /* receive column indices */ 260 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 262 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 263 /* determine number diagonal and off-diagonal counts */ 264 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 265 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 266 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 267 jj = 0; 268 for (i=0; i<m; i++) { 269 for (j=0; j<dlens[i]; j++) { 270 if (gmataj[jj] < rstart) ld[i]++; 271 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 272 jj++; 273 } 274 } 275 /* receive numerical values */ 276 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 277 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 278 } 279 /* set preallocation */ 280 for (i=0; i<m; i++) { 281 dlens[i] -= olens[i]; 282 } 283 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 284 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 285 286 for (i=0; i<m; i++) { 287 dlens[i] += olens[i]; 288 } 289 cnt = 0; 290 for (i=0; i<m; i++) { 291 row = rstart + i; 292 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 293 cnt += dlens[i]; 294 } 295 if (rank) { 296 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 297 } 298 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 299 ierr = PetscFree(rowners);CHKERRQ(ierr); 300 301 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 302 303 *inmat = mat; 304 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 307 mat = *inmat; 308 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 309 if (!rank) { 310 /* send numerical values to other processes */ 311 gmata = (Mat_SeqAIJ*) gmat->data; 312 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 313 gmataa = gmata->a; 314 for (i=1; i<size; i++) { 315 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 316 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 317 } 318 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 319 } else { 320 /* receive numerical values from process 0*/ 321 nz = Ad->nz + Ao->nz; 322 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 326 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 327 ad = Ad->a; 328 ao = Ao->a; 329 if (mat->rmap->n) { 330 i = 0; 331 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 for (i=1; i<mat->rmap->n; i++) { 335 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 336 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 337 } 338 i--; 339 if (mat->rmap->n) { 340 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 341 } 342 if (rank) { 343 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 344 } 345 } 346 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 /* 352 Local utility routine that creates a mapping from the global column 353 number to the local number in the off-diagonal part of the local 354 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 355 a slightly higher hash table cost; without it it is not scalable (each processor 356 has an order N integer array but is fast to acess. 357 */ 358 #undef __FUNCT__ 359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 361 { 362 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 363 PetscErrorCode ierr; 364 PetscInt n = aij->B->cmap->n,i; 365 366 PetscFunctionBegin; 367 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 368 #if defined(PETSC_USE_CTABLE) 369 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 370 for (i=0; i<n; i++) { 371 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 372 } 373 #else 374 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 375 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 376 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 377 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 378 #endif 379 PetscFunctionReturn(0); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 383 { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col;\ 387 while (high1-low1 > 5) { \ 388 t = (low1+high1)/2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i=low1; _i<high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) ap1[_i] += value; \ 396 else ap1[_i] = value; \ 397 goto a_noinsert; \ 398 } \ 399 } \ 400 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 401 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 402 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 403 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 404 N = nrow1++ - 1; a->nz++; high1++; \ 405 /* shift up all the later entries in this row */ \ 406 for (ii=N; ii>=_i; ii--) { \ 407 rp1[ii+1] = rp1[ii]; \ 408 ap1[ii+1] = ap1[ii]; \ 409 } \ 410 rp1[_i] = col; \ 411 ap1[_i] = value; \ 412 a_noinsert: ; \ 413 ailen[row] = nrow1; \ 414 } 415 416 417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 418 { \ 419 if (col <= lastcol2) low2 = 0; \ 420 else high2 = nrow2; \ 421 lastcol2 = col; \ 422 while (high2-low2 > 5) { \ 423 t = (low2+high2)/2; \ 424 if (rp2[t] > col) high2 = t; \ 425 else low2 = t; \ 426 } \ 427 for (_i=low2; _i<high2; _i++) { \ 428 if (rp2[_i] > col) break; \ 429 if (rp2[_i] == col) { \ 430 if (addv == ADD_VALUES) ap2[_i] += value; \ 431 else ap2[_i] = value; \ 432 goto b_noinsert; \ 433 } \ 434 } \ 435 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 436 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 437 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 438 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 439 N = nrow2++ - 1; b->nz++; high2++; \ 440 /* shift up all the later entries in this row */ \ 441 for (ii=N; ii>=_i; ii--) { \ 442 rp2[ii+1] = rp2[ii]; \ 443 ap2[ii+1] = ap2[ii]; \ 444 } \ 445 rp2[_i] = col; \ 446 ap2[_i] = value; \ 447 b_noinsert: ; \ 448 bilen[row] = nrow2; \ 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 454 { 455 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 456 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 457 PetscErrorCode ierr; 458 PetscInt l,*garray = mat->garray,diag; 459 460 PetscFunctionBegin; 461 /* code only works for square matrices A */ 462 463 /* find size of row to the left of the diagonal part */ 464 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 465 row = row - diag; 466 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 467 if (garray[b->j[b->i[row]+l]] > diag) break; 468 } 469 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 470 471 /* diagonal part */ 472 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 473 474 /* right of diagonal part */ 475 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatSetValues_MPIAIJ" 481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 PetscScalar value; 485 PetscErrorCode ierr; 486 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 487 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 488 PetscBool roworiented = aij->roworiented; 489 490 /* Some Variables required in the macro */ 491 Mat A = aij->A; 492 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 493 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 494 MatScalar *aa = a->a; 495 PetscBool ignorezeroentries = a->ignorezeroentries; 496 Mat B = aij->B; 497 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 498 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 499 MatScalar *ba = b->a; 500 501 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 502 PetscInt nonew; 503 MatScalar *ap1,*ap2; 504 505 PetscFunctionBegin; 506 if (v) PetscValidScalarPointer(v,6); 507 for (i=0; i<m; i++) { 508 if (im[i] < 0) continue; 509 #if defined(PETSC_USE_DEBUG) 510 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 511 #endif 512 if (im[i] >= rstart && im[i] < rend) { 513 row = im[i] - rstart; 514 lastcol1 = -1; 515 rp1 = aj + ai[row]; 516 ap1 = aa + ai[row]; 517 rmax1 = aimax[row]; 518 nrow1 = ailen[row]; 519 low1 = 0; 520 high1 = nrow1; 521 lastcol2 = -1; 522 rp2 = bj + bi[row]; 523 ap2 = ba + bi[row]; 524 rmax2 = bimax[row]; 525 nrow2 = bilen[row]; 526 low2 = 0; 527 high2 = nrow2; 528 529 for (j=0; j<n; j++) { 530 if (v) { 531 if (roworiented) value = v[i*n+j]; 532 else value = v[i+j*m]; 533 } else value = 0.0; 534 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 535 if (in[j] >= cstart && in[j] < cend) { 536 col = in[j] - cstart; 537 nonew = a->nonew; 538 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 539 } else if (in[j] < 0) continue; 540 #if defined(PETSC_USE_DEBUG) 541 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 542 #endif 543 else { 544 if (mat->was_assembled) { 545 if (!aij->colmap) { 546 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 547 } 548 #if defined(PETSC_USE_CTABLE) 549 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 550 col--; 551 #else 552 col = aij->colmap[in[j]] - 1; 553 #endif 554 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 555 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 556 col = in[j]; 557 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 558 B = aij->B; 559 b = (Mat_SeqAIJ*)B->data; 560 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 561 rp2 = bj + bi[row]; 562 ap2 = ba + bi[row]; 563 rmax2 = bimax[row]; 564 nrow2 = bilen[row]; 565 low2 = 0; 566 high2 = nrow2; 567 bm = aij->B->rmap->n; 568 ba = b->a; 569 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 570 } else col = in[j]; 571 nonew = b->nonew; 572 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 573 } 574 } 575 } else { 576 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 577 if (!aij->donotstash) { 578 mat->assembled = PETSC_FALSE; 579 if (roworiented) { 580 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 581 } else { 582 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 583 } 584 } 585 } 586 } 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatGetValues_MPIAIJ" 592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 593 { 594 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 595 PetscErrorCode ierr; 596 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 597 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 598 599 PetscFunctionBegin; 600 for (i=0; i<m; i++) { 601 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 602 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 603 if (idxm[i] >= rstart && idxm[i] < rend) { 604 row = idxm[i] - rstart; 605 for (j=0; j<n; j++) { 606 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 607 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 608 if (idxn[j] >= cstart && idxn[j] < cend) { 609 col = idxn[j] - cstart; 610 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 611 } else { 612 if (!aij->colmap) { 613 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 614 } 615 #if defined(PETSC_USE_CTABLE) 616 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 617 col--; 618 #else 619 col = aij->colmap[idxn[j]] - 1; 620 #endif 621 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 622 else { 623 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 624 } 625 } 626 } 627 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 628 } 629 PetscFunctionReturn(0); 630 } 631 632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt nstash,reallocs; 641 InsertMode addv; 642 643 PetscFunctionBegin; 644 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 645 646 /* make sure all processors are either in INSERTMODE or ADDMODE */ 647 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 648 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 649 mat->insertmode = addv; /* in case this processor had no cache */ 650 651 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 652 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 653 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 660 { 661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 662 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 663 PetscErrorCode ierr; 664 PetscMPIInt n; 665 PetscInt i,j,rstart,ncols,flg; 666 PetscInt *row,*col; 667 PetscBool other_disassembled; 668 PetscScalar *val; 669 InsertMode addv = mat->insertmode; 670 671 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 672 673 PetscFunctionBegin; 674 if (!aij->donotstash && !mat->nooffprocentries) { 675 while (1) { 676 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 677 if (!flg) break; 678 679 for (i=0; i<n; ) { 680 /* Now identify the consecutive vals belonging to the same row */ 681 for (j=i,rstart=row[j]; j<n; j++) { 682 if (row[j] != rstart) break; 683 } 684 if (j < n) ncols = j-i; 685 else ncols = n-i; 686 /* Now assemble all these values with a single function call */ 687 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 688 689 i = j; 690 } 691 } 692 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 693 } 694 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 695 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 696 697 /* determine if any processor has disassembled, if so we must 698 also disassemble ourselfs, in order that we may reassemble. */ 699 /* 700 if nonzero structure of submatrix B cannot change then we know that 701 no processor disassembled thus we can skip this stuff 702 */ 703 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 704 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 705 if (mat->was_assembled && !other_disassembled) { 706 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 707 } 708 } 709 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 710 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 711 } 712 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 713 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 714 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 715 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 716 717 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 718 719 aij->rowvalues = 0; 720 721 /* used by MatAXPY() */ 722 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 723 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 724 725 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 726 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 733 { 734 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 739 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatZeroRows_MPIAIJ" 745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 746 { 747 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 748 PetscErrorCode ierr; 749 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 750 PetscInt i,*owners = A->rmap->range; 751 PetscInt *nprocs,j,idx,nsends,row; 752 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 753 PetscInt *rvalues,count,base,slen,*source; 754 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 755 MPI_Comm comm; 756 MPI_Request *send_waits,*recv_waits; 757 MPI_Status recv_status,*send_status; 758 const PetscScalar *xx; 759 PetscScalar *bb; 760 #if defined(PETSC_DEBUG) 761 PetscBool found = PETSC_FALSE; 762 #endif 763 764 PetscFunctionBegin; 765 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 766 /* first count number of contributors to each processor */ 767 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 768 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 769 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 770 j = 0; 771 for (i=0; i<N; i++) { 772 if (lastidx > (idx = rows[i])) j = 0; 773 lastidx = idx; 774 for (; j<size; j++) { 775 if (idx >= owners[j] && idx < owners[j+1]) { 776 nprocs[2*j]++; 777 nprocs[2*j+1] = 1; 778 owner[i] = j; 779 #if defined(PETSC_DEBUG) 780 found = PETSC_TRUE; 781 #endif 782 break; 783 } 784 } 785 #if defined(PETSC_DEBUG) 786 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 787 found = PETSC_FALSE; 788 #endif 789 } 790 nsends = 0; 791 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 792 793 if (A->nooffproczerorows) { 794 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 795 nrecvs = nsends; 796 nmax = N; 797 } else { 798 /* inform other processors of number of messages and max length*/ 799 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 800 } 801 802 /* post receives: */ 803 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 804 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 805 for (i=0; i<nrecvs; i++) { 806 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 807 } 808 809 /* do sends: 810 1) starts[i] gives the starting index in svalues for stuff going to 811 the ith processor 812 */ 813 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 814 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 815 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 816 817 starts[0] = 0; 818 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 819 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 820 821 starts[0] = 0; 822 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 823 count = 0; 824 for (i=0; i<size; i++) { 825 if (nprocs[2*i+1]) { 826 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 827 } 828 } 829 ierr = PetscFree(starts);CHKERRQ(ierr); 830 831 base = owners[rank]; 832 833 /* wait on receives */ 834 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 835 count = nrecvs; slen = 0; 836 while (count) { 837 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 838 /* unpack receives into our local space */ 839 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 840 841 source[imdex] = recv_status.MPI_SOURCE; 842 lens[imdex] = n; 843 slen += n; 844 count--; 845 } 846 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 847 848 /* move the data into the send scatter */ 849 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 850 count = 0; 851 for (i=0; i<nrecvs; i++) { 852 values = rvalues + i*nmax; 853 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 854 } 855 ierr = PetscFree(rvalues);CHKERRQ(ierr); 856 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 857 ierr = PetscFree(owner);CHKERRQ(ierr); 858 ierr = PetscFree(nprocs);CHKERRQ(ierr); 859 860 /* fix right hand side if needed */ 861 if (x && b) { 862 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 863 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 864 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 865 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 866 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 867 } 868 /* 869 Zero the required rows. If the "diagonal block" of the matrix 870 is square and the user wishes to set the diagonal we use separate 871 code so that MatSetValues() is not called for each diagonal allocating 872 new memory, thus calling lots of mallocs and slowing things down. 873 874 */ 875 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 876 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 877 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 878 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 879 } else if (diag != 0.0) { 880 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 881 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 882 for (i = 0; i < slen; i++) { 883 row = lrows[i] + rstart; 884 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 885 } 886 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 } else { 889 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 890 } 891 ierr = PetscFree(lrows);CHKERRQ(ierr); 892 893 /* wait on sends */ 894 if (nsends) { 895 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 896 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 897 ierr = PetscFree(send_status);CHKERRQ(ierr); 898 } 899 ierr = PetscFree(send_waits);CHKERRQ(ierr); 900 ierr = PetscFree(svalues);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 907 { 908 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 909 PetscErrorCode ierr; 910 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 911 PetscInt i,*owners = A->rmap->range; 912 PetscInt *nprocs,j,idx,nsends; 913 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 914 PetscInt *rvalues,count,base,slen,*source; 915 PetscInt *lens,*lrows,*values,m; 916 MPI_Comm comm; 917 MPI_Request *send_waits,*recv_waits; 918 MPI_Status recv_status,*send_status; 919 const PetscScalar *xx; 920 PetscScalar *bb,*mask; 921 Vec xmask,lmask; 922 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 923 const PetscInt *aj, *ii,*ridx; 924 PetscScalar *aa; 925 #if defined(PETSC_DEBUG) 926 PetscBool found = PETSC_FALSE; 927 #endif 928 929 PetscFunctionBegin; 930 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 931 /* first count number of contributors to each processor */ 932 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 933 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 934 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 935 j = 0; 936 for (i=0; i<N; i++) { 937 if (lastidx > (idx = rows[i])) j = 0; 938 lastidx = idx; 939 for (; j<size; j++) { 940 if (idx >= owners[j] && idx < owners[j+1]) { 941 nprocs[2*j]++; 942 nprocs[2*j+1] = 1; 943 owner[i] = j; 944 #if defined(PETSC_DEBUG) 945 found = PETSC_TRUE; 946 #endif 947 break; 948 } 949 } 950 #if defined(PETSC_DEBUG) 951 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 952 found = PETSC_FALSE; 953 #endif 954 } 955 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 956 957 /* inform other processors of number of messages and max length*/ 958 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 959 960 /* post receives: */ 961 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 962 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 963 for (i=0; i<nrecvs; i++) { 964 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 965 } 966 967 /* do sends: 968 1) starts[i] gives the starting index in svalues for stuff going to 969 the ith processor 970 */ 971 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 972 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 973 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 974 975 starts[0] = 0; 976 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 977 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 978 979 starts[0] = 0; 980 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 981 count = 0; 982 for (i=0; i<size; i++) { 983 if (nprocs[2*i+1]) { 984 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 985 } 986 } 987 ierr = PetscFree(starts);CHKERRQ(ierr); 988 989 base = owners[rank]; 990 991 /* wait on receives */ 992 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 993 count = nrecvs; slen = 0; 994 while (count) { 995 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 996 /* unpack receives into our local space */ 997 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 998 999 source[imdex] = recv_status.MPI_SOURCE; 1000 lens[imdex] = n; 1001 slen += n; 1002 count--; 1003 } 1004 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1005 1006 /* move the data into the send scatter */ 1007 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1008 count = 0; 1009 for (i=0; i<nrecvs; i++) { 1010 values = rvalues + i*nmax; 1011 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1012 } 1013 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1014 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1015 ierr = PetscFree(owner);CHKERRQ(ierr); 1016 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1017 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1018 1019 /* zero diagonal part of matrix */ 1020 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1021 1022 /* handle off diagonal part of matrix */ 1023 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1024 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1025 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1026 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1027 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1028 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1031 if (x) { 1032 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1035 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1036 } 1037 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1038 1039 /* remove zeroed rows of off diagonal matrix */ 1040 ii = aij->i; 1041 for (i=0; i<slen; i++) { 1042 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1043 } 1044 1045 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1046 if (aij->compressedrow.use) { 1047 m = aij->compressedrow.nrows; 1048 ii = aij->compressedrow.i; 1049 ridx = aij->compressedrow.rindex; 1050 for (i=0; i<m; i++) { 1051 n = ii[i+1] - ii[i]; 1052 aj = aij->j + ii[i]; 1053 aa = aij->a + ii[i]; 1054 1055 for (j=0; j<n; j++) { 1056 if (PetscAbsScalar(mask[*aj])) { 1057 if (b) bb[*ridx] -= *aa*xx[*aj]; 1058 *aa = 0.0; 1059 } 1060 aa++; 1061 aj++; 1062 } 1063 ridx++; 1064 } 1065 } else { /* do not use compressed row format */ 1066 m = l->B->rmap->n; 1067 for (i=0; i<m; i++) { 1068 n = ii[i+1] - ii[i]; 1069 aj = aij->j + ii[i]; 1070 aa = aij->a + ii[i]; 1071 for (j=0; j<n; j++) { 1072 if (PetscAbsScalar(mask[*aj])) { 1073 if (b) bb[i] -= *aa*xx[*aj]; 1074 *aa = 0.0; 1075 } 1076 aa++; 1077 aj++; 1078 } 1079 } 1080 } 1081 if (x) { 1082 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1083 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1084 } 1085 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1086 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1087 ierr = PetscFree(lrows);CHKERRQ(ierr); 1088 1089 /* wait on sends */ 1090 if (nsends) { 1091 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1092 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1093 ierr = PetscFree(send_status);CHKERRQ(ierr); 1094 } 1095 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1096 ierr = PetscFree(svalues);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMult_MPIAIJ" 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 1108 PetscFunctionBegin; 1109 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1110 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscBool merged; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 if (!merged) { 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* added in yy until the next line, */ 1164 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 } else { 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 /* values actually were received in the Begin() but we need to call this nop */ 1171 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1178 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1179 { 1180 MPI_Comm comm; 1181 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1182 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1183 IS Me,Notme; 1184 PetscErrorCode ierr; 1185 PetscInt M,N,first,last,*notme,i; 1186 PetscMPIInt size; 1187 1188 PetscFunctionBegin; 1189 /* Easy test: symmetric diagonal block */ 1190 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1191 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1192 if (!*f) PetscFunctionReturn(0); 1193 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1194 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1195 if (size == 1) PetscFunctionReturn(0); 1196 1197 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1198 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1199 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1200 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1201 for (i=0; i<first; i++) notme[i] = i; 1202 for (i=last; i<M; i++) notme[i-last+first] = i; 1203 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1204 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1205 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1206 Aoff = Aoffs[0]; 1207 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1208 Boff = Boffs[0]; 1209 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1210 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1211 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1212 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1213 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1214 ierr = PetscFree(notme);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1221 { 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1223 PetscErrorCode ierr; 1224 1225 PetscFunctionBegin; 1226 /* do nondiagonal part */ 1227 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1228 /* send it on its way */ 1229 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1230 /* do local part */ 1231 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1232 /* receive remote parts */ 1233 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 /* 1238 This only works correctly for square matrices where the subblock A->A is the 1239 diagonal block 1240 */ 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1244 { 1245 PetscErrorCode ierr; 1246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1247 1248 PetscFunctionBegin; 1249 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1250 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1251 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatScale_MPIAIJ" 1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1258 { 1259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1260 PetscErrorCode ierr; 1261 1262 PetscFunctionBegin; 1263 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1264 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatDestroy_MPIAIJ" 1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1271 { 1272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1273 PetscErrorCode ierr; 1274 1275 PetscFunctionBegin; 1276 #if defined(PETSC_USE_LOG) 1277 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1278 #endif 1279 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1280 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1281 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1282 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1283 #if defined(PETSC_USE_CTABLE) 1284 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1285 #else 1286 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1287 #endif 1288 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1289 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1290 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1291 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1292 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1293 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1294 1295 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1310 { 1311 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1312 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1313 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1314 PetscErrorCode ierr; 1315 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1316 int fd; 1317 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1318 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1319 PetscScalar *column_values; 1320 PetscInt message_count,flowcontrolcount; 1321 FILE *file; 1322 1323 PetscFunctionBegin; 1324 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1325 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1326 nz = A->nz + B->nz; 1327 if (!rank) { 1328 header[0] = MAT_FILE_CLASSID; 1329 header[1] = mat->rmap->N; 1330 header[2] = mat->cmap->N; 1331 1332 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1333 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1334 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 /* get largest number of rows any processor has */ 1336 rlen = mat->rmap->n; 1337 range = mat->rmap->range; 1338 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1339 } else { 1340 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1341 rlen = mat->rmap->n; 1342 } 1343 1344 /* load up the local row counts */ 1345 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1346 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1347 1348 /* store the row lengths to the file */ 1349 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1350 if (!rank) { 1351 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1352 for (i=1; i<size; i++) { 1353 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1354 rlen = range[i+1] - range[i]; 1355 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1356 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1357 } 1358 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1359 } else { 1360 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1361 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1362 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1363 } 1364 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1365 1366 /* load up the local column indices */ 1367 nzmax = nz; /* th processor needs space a largest processor needs */ 1368 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1369 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1370 cnt = 0; 1371 for (i=0; i<mat->rmap->n; i++) { 1372 for (j=B->i[i]; j<B->i[i+1]; j++) { 1373 if ((col = garray[B->j[j]]) > cstart) break; 1374 column_indices[cnt++] = col; 1375 } 1376 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1377 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1378 } 1379 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1380 1381 /* store the column indices to the file */ 1382 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1383 if (!rank) { 1384 MPI_Status status; 1385 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1386 for (i=1; i<size; i++) { 1387 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1388 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1389 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1390 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1391 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1392 } 1393 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1394 } else { 1395 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1396 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1397 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1398 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1399 } 1400 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1401 1402 /* load up the local column values */ 1403 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1404 cnt = 0; 1405 for (i=0; i<mat->rmap->n; i++) { 1406 for (j=B->i[i]; j<B->i[i+1]; j++) { 1407 if (garray[B->j[j]] > cstart) break; 1408 column_values[cnt++] = B->a[j]; 1409 } 1410 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1411 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1412 } 1413 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1414 1415 /* store the column values to the file */ 1416 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1417 if (!rank) { 1418 MPI_Status status; 1419 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1420 for (i=1; i<size; i++) { 1421 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1422 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1423 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1424 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1425 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1426 } 1427 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1428 } else { 1429 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1430 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1431 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1432 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1433 } 1434 ierr = PetscFree(column_values);CHKERRQ(ierr); 1435 1436 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1437 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #include <petscdraw.h> 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1445 { 1446 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1447 PetscErrorCode ierr; 1448 PetscMPIInt rank = aij->rank,size = aij->size; 1449 PetscBool isdraw,iascii,isbinary; 1450 PetscViewer sviewer; 1451 PetscViewerFormat format; 1452 1453 PetscFunctionBegin; 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1456 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1457 if (iascii) { 1458 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1459 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1460 MatInfo info; 1461 PetscBool inodes; 1462 1463 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1464 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1465 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1467 if (!inodes) { 1468 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1469 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1470 } else { 1471 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1472 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1473 } 1474 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1475 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1476 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1477 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1478 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1479 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1480 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1481 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1484 PetscInt inodecount,inodelimit,*inodes; 1485 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1486 if (inodes) { 1487 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1488 } else { 1489 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1490 } 1491 PetscFunctionReturn(0); 1492 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1493 PetscFunctionReturn(0); 1494 } 1495 } else if (isbinary) { 1496 if (size == 1) { 1497 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1498 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1499 } else { 1500 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1501 } 1502 PetscFunctionReturn(0); 1503 } else if (isdraw) { 1504 PetscDraw draw; 1505 PetscBool isnull; 1506 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1507 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1508 } 1509 1510 if (size == 1) { 1511 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1512 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1513 } else { 1514 /* assemble the entire matrix onto first processor. */ 1515 Mat A; 1516 Mat_SeqAIJ *Aloc; 1517 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1518 MatScalar *a; 1519 1520 if (mat->rmap->N > 1024) { 1521 PetscBool flg = PETSC_FALSE; 1522 1523 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1524 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1525 } 1526 1527 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1528 if (!rank) { 1529 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1530 } else { 1531 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1532 } 1533 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1534 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1535 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1536 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1537 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1538 1539 /* copy over the A part */ 1540 Aloc = (Mat_SeqAIJ*)aij->A->data; 1541 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1542 row = mat->rmap->rstart; 1543 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1544 for (i=0; i<m; i++) { 1545 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1546 row++; 1547 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1548 } 1549 aj = Aloc->j; 1550 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1551 1552 /* copy over the B part */ 1553 Aloc = (Mat_SeqAIJ*)aij->B->data; 1554 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1555 row = mat->rmap->rstart; 1556 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1557 ct = cols; 1558 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1559 for (i=0; i<m; i++) { 1560 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1561 row++; 1562 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1563 } 1564 ierr = PetscFree(ct);CHKERRQ(ierr); 1565 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1566 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1567 /* 1568 Everyone has to call to draw the matrix since the graphics waits are 1569 synchronized across all processors that share the PetscDraw object 1570 */ 1571 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1572 if (!rank) { 1573 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1574 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1575 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1576 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1577 } 1578 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1579 ierr = MatDestroy(&A);CHKERRQ(ierr); 1580 } 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatView_MPIAIJ" 1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1587 { 1588 PetscErrorCode ierr; 1589 PetscBool iascii,isdraw,issocket,isbinary; 1590 1591 PetscFunctionBegin; 1592 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1595 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1596 if (iascii || isdraw || isbinary || issocket) { 1597 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "MatSOR_MPIAIJ" 1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1605 { 1606 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1607 PetscErrorCode ierr; 1608 Vec bb1 = 0; 1609 PetscBool hasop; 1610 1611 PetscFunctionBegin; 1612 if (flag == SOR_APPLY_UPPER) { 1613 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1618 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1619 } 1620 1621 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1622 if (flag & SOR_ZERO_INITIAL_GUESS) { 1623 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1624 its--; 1625 } 1626 1627 while (its--) { 1628 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 1631 /* update rhs: bb1 = bb - B*x */ 1632 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1633 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1634 1635 /* local sweep */ 1636 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1637 } 1638 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1639 if (flag & SOR_ZERO_INITIAL_GUESS) { 1640 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1641 its--; 1642 } 1643 while (its--) { 1644 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 1647 /* update rhs: bb1 = bb - B*x */ 1648 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1649 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1650 1651 /* local sweep */ 1652 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1653 } 1654 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1655 if (flag & SOR_ZERO_INITIAL_GUESS) { 1656 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1657 its--; 1658 } 1659 while (its--) { 1660 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 1663 /* update rhs: bb1 = bb - B*x */ 1664 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1665 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1666 1667 /* local sweep */ 1668 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1669 } 1670 } else if (flag & SOR_EISENSTAT) { 1671 Vec xx1; 1672 1673 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1675 1676 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1677 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 if (!mat->diag) { 1679 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1680 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1681 } 1682 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1683 if (hasop) { 1684 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1685 } else { 1686 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1687 } 1688 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1689 1690 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1691 1692 /* local sweep */ 1693 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1694 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1695 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1696 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1697 1698 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatPermute_MPIAIJ" 1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1705 { 1706 Mat aA,aB,Aperm; 1707 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1708 PetscScalar *aa,*ba; 1709 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1710 PetscSF rowsf,sf; 1711 IS parcolp = NULL; 1712 PetscBool done; 1713 PetscErrorCode ierr; 1714 1715 PetscFunctionBegin; 1716 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1717 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1718 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1719 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1720 1721 /* Invert row permutation to find out where my rows should go */ 1722 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1723 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1724 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1725 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1726 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1727 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1728 1729 /* Invert column permutation to find out where my columns should go */ 1730 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1731 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1732 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1733 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1734 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1735 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1736 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1737 1738 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1739 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1740 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1741 1742 /* Find out where my gcols should go */ 1743 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1744 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1745 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1746 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1747 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1748 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1749 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1750 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1751 1752 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1753 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1755 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1756 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1757 for (i=0; i<m; i++) { 1758 PetscInt row = rdest[i],rowner; 1759 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1760 for (j=ai[i]; j<ai[i+1]; j++) { 1761 PetscInt cowner,col = cdest[aj[j]]; 1762 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1763 if (rowner == cowner) dnnz[i]++; 1764 else onnz[i]++; 1765 } 1766 for (j=bi[i]; j<bi[i+1]; j++) { 1767 PetscInt cowner,col = gcdest[bj[j]]; 1768 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1769 if (rowner == cowner) dnnz[i]++; 1770 else onnz[i]++; 1771 } 1772 } 1773 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1774 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1775 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1776 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1777 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1778 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1779 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1780 1781 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1782 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1783 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1784 for (i=0; i<m; i++) { 1785 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1786 PetscInt rowlen; 1787 rowlen = ai[i+1] - ai[i]; 1788 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1789 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1790 rowlen = bi[i+1] - bi[i]; 1791 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1792 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1793 } 1794 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1795 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1797 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1798 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1799 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1800 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1801 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1802 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1803 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1804 *B = Aperm; 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1811 { 1812 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1813 Mat A = mat->A,B = mat->B; 1814 PetscErrorCode ierr; 1815 PetscReal isend[5],irecv[5]; 1816 1817 PetscFunctionBegin; 1818 info->block_size = 1.0; 1819 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1820 1821 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1822 isend[3] = info->memory; isend[4] = info->mallocs; 1823 1824 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1825 1826 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1827 isend[3] += info->memory; isend[4] += info->mallocs; 1828 if (flag == MAT_LOCAL) { 1829 info->nz_used = isend[0]; 1830 info->nz_allocated = isend[1]; 1831 info->nz_unneeded = isend[2]; 1832 info->memory = isend[3]; 1833 info->mallocs = isend[4]; 1834 } else if (flag == MAT_GLOBAL_MAX) { 1835 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1836 1837 info->nz_used = irecv[0]; 1838 info->nz_allocated = irecv[1]; 1839 info->nz_unneeded = irecv[2]; 1840 info->memory = irecv[3]; 1841 info->mallocs = irecv[4]; 1842 } else if (flag == MAT_GLOBAL_SUM) { 1843 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1844 1845 info->nz_used = irecv[0]; 1846 info->nz_allocated = irecv[1]; 1847 info->nz_unneeded = irecv[2]; 1848 info->memory = irecv[3]; 1849 info->mallocs = irecv[4]; 1850 } 1851 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1852 info->fill_ratio_needed = 0; 1853 info->factor_mallocs = 0; 1854 PetscFunctionReturn(0); 1855 } 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatSetOption_MPIAIJ" 1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1860 { 1861 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1862 PetscErrorCode ierr; 1863 1864 PetscFunctionBegin; 1865 switch (op) { 1866 case MAT_NEW_NONZERO_LOCATIONS: 1867 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1868 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1869 case MAT_KEEP_NONZERO_PATTERN: 1870 case MAT_NEW_NONZERO_LOCATION_ERR: 1871 case MAT_USE_INODES: 1872 case MAT_IGNORE_ZERO_ENTRIES: 1873 MatCheckPreallocated(A,1); 1874 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1875 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1876 break; 1877 case MAT_ROW_ORIENTED: 1878 a->roworiented = flg; 1879 1880 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1881 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1882 break; 1883 case MAT_NEW_DIAGONALS: 1884 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1885 break; 1886 case MAT_IGNORE_OFF_PROC_ENTRIES: 1887 a->donotstash = flg; 1888 break; 1889 case MAT_SPD: 1890 A->spd_set = PETSC_TRUE; 1891 A->spd = flg; 1892 if (flg) { 1893 A->symmetric = PETSC_TRUE; 1894 A->structurally_symmetric = PETSC_TRUE; 1895 A->symmetric_set = PETSC_TRUE; 1896 A->structurally_symmetric_set = PETSC_TRUE; 1897 } 1898 break; 1899 case MAT_SYMMETRIC: 1900 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1901 break; 1902 case MAT_STRUCTURALLY_SYMMETRIC: 1903 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1904 break; 1905 case MAT_HERMITIAN: 1906 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1907 break; 1908 case MAT_SYMMETRY_ETERNAL: 1909 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1910 break; 1911 default: 1912 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1913 } 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatGetRow_MPIAIJ" 1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1920 { 1921 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1922 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1923 PetscErrorCode ierr; 1924 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1925 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1926 PetscInt *cmap,*idx_p; 1927 1928 PetscFunctionBegin; 1929 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1930 mat->getrowactive = PETSC_TRUE; 1931 1932 if (!mat->rowvalues && (idx || v)) { 1933 /* 1934 allocate enough space to hold information from the longest row. 1935 */ 1936 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1937 PetscInt max = 1,tmp; 1938 for (i=0; i<matin->rmap->n; i++) { 1939 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1940 if (max < tmp) max = tmp; 1941 } 1942 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1943 } 1944 1945 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1946 lrow = row - rstart; 1947 1948 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1949 if (!v) {pvA = 0; pvB = 0;} 1950 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1951 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1952 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1953 nztot = nzA + nzB; 1954 1955 cmap = mat->garray; 1956 if (v || idx) { 1957 if (nztot) { 1958 /* Sort by increasing column numbers, assuming A and B already sorted */ 1959 PetscInt imark = -1; 1960 if (v) { 1961 *v = v_p = mat->rowvalues; 1962 for (i=0; i<nzB; i++) { 1963 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1964 else break; 1965 } 1966 imark = i; 1967 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1968 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1969 } 1970 if (idx) { 1971 *idx = idx_p = mat->rowindices; 1972 if (imark > -1) { 1973 for (i=0; i<imark; i++) { 1974 idx_p[i] = cmap[cworkB[i]]; 1975 } 1976 } else { 1977 for (i=0; i<nzB; i++) { 1978 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1979 else break; 1980 } 1981 imark = i; 1982 } 1983 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1984 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1985 } 1986 } else { 1987 if (idx) *idx = 0; 1988 if (v) *v = 0; 1989 } 1990 } 1991 *nz = nztot; 1992 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1993 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2000 { 2001 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2002 2003 PetscFunctionBegin; 2004 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2005 aij->getrowactive = PETSC_FALSE; 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatNorm_MPIAIJ" 2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2012 { 2013 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2014 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2015 PetscErrorCode ierr; 2016 PetscInt i,j,cstart = mat->cmap->rstart; 2017 PetscReal sum = 0.0; 2018 MatScalar *v; 2019 2020 PetscFunctionBegin; 2021 if (aij->size == 1) { 2022 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2023 } else { 2024 if (type == NORM_FROBENIUS) { 2025 v = amat->a; 2026 for (i=0; i<amat->nz; i++) { 2027 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2028 } 2029 v = bmat->a; 2030 for (i=0; i<bmat->nz; i++) { 2031 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2032 } 2033 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2034 *norm = PetscSqrtReal(*norm); 2035 } else if (type == NORM_1) { /* max column norm */ 2036 PetscReal *tmp,*tmp2; 2037 PetscInt *jj,*garray = aij->garray; 2038 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2039 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2040 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2041 *norm = 0.0; 2042 v = amat->a; jj = amat->j; 2043 for (j=0; j<amat->nz; j++) { 2044 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2045 } 2046 v = bmat->a; jj = bmat->j; 2047 for (j=0; j<bmat->nz; j++) { 2048 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2049 } 2050 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2051 for (j=0; j<mat->cmap->N; j++) { 2052 if (tmp2[j] > *norm) *norm = tmp2[j]; 2053 } 2054 ierr = PetscFree(tmp);CHKERRQ(ierr); 2055 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2056 } else if (type == NORM_INFINITY) { /* max row norm */ 2057 PetscReal ntemp = 0.0; 2058 for (j=0; j<aij->A->rmap->n; j++) { 2059 v = amat->a + amat->i[j]; 2060 sum = 0.0; 2061 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2062 sum += PetscAbsScalar(*v); v++; 2063 } 2064 v = bmat->a + bmat->i[j]; 2065 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2066 sum += PetscAbsScalar(*v); v++; 2067 } 2068 if (sum > ntemp) ntemp = sum; 2069 } 2070 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2071 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2072 } 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatTranspose_MPIAIJ" 2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2079 { 2080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2081 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2082 PetscErrorCode ierr; 2083 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2084 PetscInt cstart = A->cmap->rstart,ncol; 2085 Mat B; 2086 MatScalar *array; 2087 2088 PetscFunctionBegin; 2089 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2090 2091 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2092 ai = Aloc->i; aj = Aloc->j; 2093 bi = Bloc->i; bj = Bloc->j; 2094 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2095 PetscInt *d_nnz,*g_nnz,*o_nnz; 2096 PetscSFNode *oloc; 2097 PETSC_UNUSED PetscSF sf; 2098 2099 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2100 /* compute d_nnz for preallocation */ 2101 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2102 for (i=0; i<ai[ma]; i++) { 2103 d_nnz[aj[i]]++; 2104 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2105 } 2106 /* compute local off-diagonal contributions */ 2107 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2108 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2109 /* map those to global */ 2110 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2111 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2112 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2113 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2114 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2115 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2116 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2117 2118 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2119 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2120 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2121 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2122 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2123 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2124 } else { 2125 B = *matout; 2126 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2127 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2128 } 2129 2130 /* copy over the A part */ 2131 array = Aloc->a; 2132 row = A->rmap->rstart; 2133 for (i=0; i<ma; i++) { 2134 ncol = ai[i+1]-ai[i]; 2135 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2136 row++; 2137 array += ncol; aj += ncol; 2138 } 2139 aj = Aloc->j; 2140 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2141 2142 /* copy over the B part */ 2143 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2144 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2145 array = Bloc->a; 2146 row = A->rmap->rstart; 2147 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2148 cols_tmp = cols; 2149 for (i=0; i<mb; i++) { 2150 ncol = bi[i+1]-bi[i]; 2151 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2152 row++; 2153 array += ncol; cols_tmp += ncol; 2154 } 2155 ierr = PetscFree(cols);CHKERRQ(ierr); 2156 2157 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2160 *matout = B; 2161 } else { 2162 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2163 } 2164 PetscFunctionReturn(0); 2165 } 2166 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2170 { 2171 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2172 Mat a = aij->A,b = aij->B; 2173 PetscErrorCode ierr; 2174 PetscInt s1,s2,s3; 2175 2176 PetscFunctionBegin; 2177 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2178 if (rr) { 2179 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2180 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2181 /* Overlap communication with computation. */ 2182 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2183 } 2184 if (ll) { 2185 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2186 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2187 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2188 } 2189 /* scale the diagonal block */ 2190 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2191 2192 if (rr) { 2193 /* Do a scatter end and then right scale the off-diagonal block */ 2194 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2195 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2196 } 2197 PetscFunctionReturn(0); 2198 } 2199 2200 #undef __FUNCT__ 2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2203 { 2204 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2205 PetscErrorCode ierr; 2206 2207 PetscFunctionBegin; 2208 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2209 PetscFunctionReturn(0); 2210 } 2211 2212 #undef __FUNCT__ 2213 #define __FUNCT__ "MatEqual_MPIAIJ" 2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2215 { 2216 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2217 Mat a,b,c,d; 2218 PetscBool flg; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 a = matA->A; b = matA->B; 2223 c = matB->A; d = matB->B; 2224 2225 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2226 if (flg) { 2227 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2228 } 2229 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } 2232 2233 #undef __FUNCT__ 2234 #define __FUNCT__ "MatCopy_MPIAIJ" 2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2236 { 2237 PetscErrorCode ierr; 2238 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2239 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2240 2241 PetscFunctionBegin; 2242 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2243 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2244 /* because of the column compression in the off-processor part of the matrix a->B, 2245 the number of columns in a->B and b->B may be different, hence we cannot call 2246 the MatCopy() directly on the two parts. If need be, we can provide a more 2247 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2248 then copying the submatrices */ 2249 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2250 } else { 2251 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2252 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2253 } 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "MatSetUp_MPIAIJ" 2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2260 { 2261 PetscErrorCode ierr; 2262 2263 PetscFunctionBegin; 2264 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2265 PetscFunctionReturn(0); 2266 } 2267 2268 #undef __FUNCT__ 2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2272 { 2273 PetscInt i,m=Y->rmap->N; 2274 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2275 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2276 const PetscInt *xi = x->i,*yi = y->i; 2277 2278 PetscFunctionBegin; 2279 /* Set the number of nonzeros in the new matrix */ 2280 for (i=0; i<m; i++) { 2281 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2282 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2283 nnz[i] = 0; 2284 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2285 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2286 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2287 nnz[i]++; 2288 } 2289 for (; k<nzy; k++) nnz[i]++; 2290 } 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatAXPY_MPIAIJ" 2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2297 { 2298 PetscErrorCode ierr; 2299 PetscInt i; 2300 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2301 PetscBLASInt bnz,one=1; 2302 Mat_SeqAIJ *x,*y; 2303 2304 PetscFunctionBegin; 2305 if (str == SAME_NONZERO_PATTERN) { 2306 PetscScalar alpha = a; 2307 x = (Mat_SeqAIJ*)xx->A->data; 2308 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2309 y = (Mat_SeqAIJ*)yy->A->data; 2310 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2311 x = (Mat_SeqAIJ*)xx->B->data; 2312 y = (Mat_SeqAIJ*)yy->B->data; 2313 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2314 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2315 } else if (str == SUBSET_NONZERO_PATTERN) { 2316 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2317 2318 x = (Mat_SeqAIJ*)xx->B->data; 2319 y = (Mat_SeqAIJ*)yy->B->data; 2320 if (y->xtoy && y->XtoY != xx->B) { 2321 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2322 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2323 } 2324 if (!y->xtoy) { /* get xtoy */ 2325 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2326 y->XtoY = xx->B; 2327 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2328 } 2329 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2330 } else { 2331 Mat B; 2332 PetscInt *nnz_d,*nnz_o; 2333 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2334 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2335 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2336 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2337 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2338 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2339 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2340 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2341 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2342 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2343 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2344 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2345 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2346 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2347 } 2348 PetscFunctionReturn(0); 2349 } 2350 2351 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2352 2353 #undef __FUNCT__ 2354 #define __FUNCT__ "MatConjugate_MPIAIJ" 2355 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2356 { 2357 #if defined(PETSC_USE_COMPLEX) 2358 PetscErrorCode ierr; 2359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2360 2361 PetscFunctionBegin; 2362 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2363 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2364 #else 2365 PetscFunctionBegin; 2366 #endif 2367 PetscFunctionReturn(0); 2368 } 2369 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "MatRealPart_MPIAIJ" 2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2373 { 2374 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2375 PetscErrorCode ierr; 2376 2377 PetscFunctionBegin; 2378 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2379 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2386 { 2387 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2388 PetscErrorCode ierr; 2389 2390 PetscFunctionBegin; 2391 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2392 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #if defined(PETSC_HAVE_PBGL) 2397 2398 #include <boost/parallel/mpi/bsp_process_group.hpp> 2399 #include <boost/graph/distributed/ilu_default_graph.hpp> 2400 #include <boost/graph/distributed/ilu_0_block.hpp> 2401 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2402 #include <boost/graph/distributed/petsc/interface.hpp> 2403 #include <boost/multi_array.hpp> 2404 #include <boost/parallel/distributed_property_map->hpp> 2405 2406 #undef __FUNCT__ 2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2408 /* 2409 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2410 */ 2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2412 { 2413 namespace petsc = boost::distributed::petsc; 2414 2415 namespace graph_dist = boost::graph::distributed; 2416 using boost::graph::distributed::ilu_default::process_group_type; 2417 using boost::graph::ilu_permuted; 2418 2419 PetscBool row_identity, col_identity; 2420 PetscContainer c; 2421 PetscInt m, n, M, N; 2422 PetscErrorCode ierr; 2423 2424 PetscFunctionBegin; 2425 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2426 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2427 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2428 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2429 2430 process_group_type pg; 2431 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2432 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2433 lgraph_type& level_graph = *lgraph_p; 2434 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2435 2436 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2437 ilu_permuted(level_graph); 2438 2439 /* put together the new matrix */ 2440 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2441 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2442 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2443 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2444 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2445 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2446 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2447 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2448 2449 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2450 ierr = PetscContainerSetPointer(c, lgraph_p); 2451 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2452 ierr = PetscContainerDestroy(&c); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 #undef __FUNCT__ 2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2459 { 2460 PetscFunctionBegin; 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatSolve_MPIAIJ" 2466 /* 2467 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2468 */ 2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2470 { 2471 namespace graph_dist = boost::graph::distributed; 2472 2473 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2474 lgraph_type *lgraph_p; 2475 PetscContainer c; 2476 PetscErrorCode ierr; 2477 2478 PetscFunctionBegin; 2479 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2480 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2481 ierr = VecCopy(b, x);CHKERRQ(ierr); 2482 2483 PetscScalar *array_x; 2484 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2485 PetscInt sx; 2486 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2487 2488 PetscScalar *array_b; 2489 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2490 PetscInt sb; 2491 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2492 2493 lgraph_type& level_graph = *lgraph_p; 2494 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2495 2496 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2497 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2498 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2499 2500 typedef boost::iterator_property_map<array_ref_type::iterator, 2501 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2502 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2503 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2504 2505 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2506 PetscFunctionReturn(0); 2507 } 2508 #endif 2509 2510 #undef __FUNCT__ 2511 #define __FUNCT__ "MatDestroy_MatRedundant" 2512 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2513 { 2514 PetscErrorCode ierr; 2515 Mat_Redundant *redund; 2516 PetscInt i; 2517 PetscMPIInt size; 2518 2519 PetscFunctionBegin; 2520 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2521 if (size == 1) { 2522 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2523 redund = a->redundant; 2524 } else { 2525 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2526 redund = a->redundant; 2527 } 2528 if (redund){ 2529 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2530 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2531 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2532 for (i=0; i<redund->nrecvs; i++) { 2533 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2534 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2535 } 2536 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2537 2538 if (redund->psubcomm) { 2539 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2540 } 2541 2542 ierr = redund->Destroy(A);CHKERRQ(ierr); 2543 ierr = PetscFree(redund);CHKERRQ(ierr); 2544 } 2545 PetscFunctionReturn(0); 2546 } 2547 2548 #undef __FUNCT__ 2549 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_psubcomm" 2550 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_psubcomm(Mat mat,PetscInt nsubcomm,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant) 2551 { 2552 PetscMPIInt rank,size; 2553 MPI_Comm comm,subcomm=psubcomm->comm; 2554 PetscErrorCode ierr; 2555 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2556 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2557 PetscInt *rowrange = mat->rmap->range; 2558 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2559 Mat A = aij->A,B=aij->B,C=*matredundant; 2560 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2561 PetscScalar *sbuf_a; 2562 PetscInt nzlocal=a->nz+b->nz; 2563 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2564 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2565 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2566 MatScalar *aworkA,*aworkB; 2567 PetscScalar *vals; 2568 PetscMPIInt tag1,tag2,tag3,imdex; 2569 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2570 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2571 MPI_Status recv_status,*send_status; 2572 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2573 PetscInt **rbuf_j=NULL; 2574 PetscScalar **rbuf_a=NULL; 2575 Mat_Redundant *redund =NULL; 2576 PetscInt rstart_sub,rend_sub,mloc_sub; 2577 2578 PetscFunctionBegin; 2579 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2580 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2581 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2582 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2583 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2584 2585 if (reuse == MAT_REUSE_MATRIX) { 2586 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2587 if (subsize == 1) { 2588 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2589 redund = c->redundant; 2590 } else { 2591 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2592 redund = c->redundant; 2593 } 2594 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2595 2596 nsends = redund->nsends; 2597 nrecvs = redund->nrecvs; 2598 send_rank = redund->send_rank; 2599 recv_rank = redund->recv_rank; 2600 sbuf_nz = redund->sbuf_nz; 2601 rbuf_nz = redund->rbuf_nz; 2602 sbuf_j = redund->sbuf_j; 2603 sbuf_a = redund->sbuf_a; 2604 rbuf_j = redund->rbuf_j; 2605 rbuf_a = redund->rbuf_a; 2606 } 2607 2608 if (reuse == MAT_INITIAL_MATRIX) { 2609 PetscInt nleftover,np_subcomm; 2610 2611 /* get local size of redundant matrix */ 2612 const PetscInt *range; 2613 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2614 rstart_sub = range[nsubcomm*subrank]; 2615 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2616 rend_sub = range[nsubcomm*(subrank+1)]; 2617 } else { 2618 rend_sub = mat->rmap->N; 2619 } 2620 mloc_sub = rend_sub - rstart_sub; 2621 2622 /* get the destination processors' id send_rank, nsends and nrecvs */ 2623 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2624 2625 np_subcomm = size/nsubcomm; 2626 nleftover = size - nsubcomm*np_subcomm; 2627 2628 nsends = 0; nrecvs = 0; 2629 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2630 /* -------------------------------------------*/ 2631 for (i=0; i<size; i++) { 2632 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2633 send_rank[nsends] = i; nsends++; 2634 recv_rank[nrecvs++] = i; 2635 /* printf("[%d] send to and recv from [%d]\n",rank,i); */ 2636 } 2637 } 2638 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2639 i = size-nleftover-1; 2640 j = 0; 2641 while (j < nsubcomm - nleftover) { 2642 send_rank[nsends++] = i; 2643 i--; j++; 2644 /* printf("[%d] send to [%d]\n",rank,i); */ 2645 } 2646 } 2647 2648 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2649 for (i=0; i<nleftover; i++) { 2650 recv_rank[nrecvs++] = size-nleftover+i; 2651 /* printf("[%d] recv from [%d]\n",rank,i); */ 2652 } 2653 } 2654 } else if (psubcomm->type == PETSC_SUBCOMM_CONTIGUOUS) { 2655 /* --------------------------------------------------*/ 2656 PetscInt color,subcommstart; 2657 subcommstart=0; 2658 for (color=0; color<nsubcomm; color++) { 2659 if (psubcomm->color != color) { 2660 for (i=0; i<psubcomm->subsize[color]; i++) { 2661 if (subrank == i) { /* my_subrank == other's subrank */ 2662 send_rank[nsends++] = subcommstart+i; 2663 recv_rank[nrecvs++] = subcommstart+i; 2664 /* printf("[%d] send to and recv from [%d]\n",rank,subcommstart+i); */ 2665 } 2666 } 2667 } 2668 subcommstart += psubcomm->subsize[color]; 2669 } 2670 if (nleftover && subrank == size/nsubcomm) { /* this proc is a leftover proc, send to subcomm that does not have leftover proc */ 2671 subcommstart=0; 2672 for (color=0; color<nsubcomm; color++) { 2673 subcommstart += psubcomm->subsize[color]; 2674 if (psubcomm->color == color) continue; 2675 if (psubcomm->subsize[color] == size/nsubcomm) { /* subcomm does not have leftover proc */ 2676 send_rank[nsends++] = subcommstart -1; /* send to the last proc of subcomm[color] */ 2677 /* printf("[%d] leftover send to [%d] \n",rank,subcommstart -1); */ 2678 } 2679 } 2680 } 2681 2682 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2683 subcommstart=0; 2684 for (color=0; color<nsubcomm; color++) { 2685 subcommstart += psubcomm->subsize[color]; 2686 if (psubcomm->subsize[color] > size/nsubcomm) { /* subcomm has leftover proc */ 2687 recv_rank[nrecvs++] = subcommstart -1; /* recv from the last proc of subcomm[color] */ 2688 /* printf("[%d] recv from [%d]\n",rank,subcommstart -1); */ 2689 } 2690 } 2691 } 2692 } /* endof (psubcomm->type == PETSC_SUBCOMM_INTERLACED) */ 2693 2694 /* allocate sbuf_j, sbuf_a */ 2695 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2696 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2697 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2698 /* 2699 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2700 ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr); 2701 */ 2702 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2703 2704 /* copy mat's local entries into the buffers */ 2705 if (reuse == MAT_INITIAL_MATRIX) { 2706 rownz_max = 0; 2707 rptr = sbuf_j; 2708 cols = sbuf_j + rend-rstart + 1; 2709 vals = sbuf_a; 2710 rptr[0] = 0; 2711 for (i=0; i<rend-rstart; i++) { 2712 row = i + rstart; 2713 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2714 ncols = nzA + nzB; 2715 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2716 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2717 /* load the column indices for this row into cols */ 2718 lwrite = 0; 2719 for (l=0; l<nzB; l++) { 2720 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2721 vals[lwrite] = aworkB[l]; 2722 cols[lwrite++] = ctmp; 2723 } 2724 } 2725 for (l=0; l<nzA; l++) { 2726 vals[lwrite] = aworkA[l]; 2727 cols[lwrite++] = cstart + cworkA[l]; 2728 } 2729 for (l=0; l<nzB; l++) { 2730 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2731 vals[lwrite] = aworkB[l]; 2732 cols[lwrite++] = ctmp; 2733 } 2734 } 2735 vals += ncols; 2736 cols += ncols; 2737 rptr[i+1] = rptr[i] + ncols; 2738 if (rownz_max < ncols) rownz_max = ncols; 2739 } 2740 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2741 } else { /* only copy matrix values into sbuf_a */ 2742 rptr = sbuf_j; 2743 vals = sbuf_a; 2744 rptr[0] = 0; 2745 for (i=0; i<rend-rstart; i++) { 2746 row = i + rstart; 2747 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2748 ncols = nzA + nzB; 2749 cworkB = b->j + b->i[i]; 2750 aworkA = a->a + a->i[i]; 2751 aworkB = b->a + b->i[i]; 2752 lwrite = 0; 2753 for (l=0; l<nzB; l++) { 2754 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2755 } 2756 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2757 for (l=0; l<nzB; l++) { 2758 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2759 } 2760 vals += ncols; 2761 rptr[i+1] = rptr[i] + ncols; 2762 } 2763 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2764 2765 /* send nzlocal to others, and recv other's nzlocal */ 2766 /*--------------------------------------------------*/ 2767 if (reuse == MAT_INITIAL_MATRIX) { 2768 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2769 2770 s_waits2 = s_waits3 + nsends; 2771 s_waits1 = s_waits2 + nsends; 2772 r_waits1 = s_waits1 + nsends; 2773 r_waits2 = r_waits1 + nrecvs; 2774 r_waits3 = r_waits2 + nrecvs; 2775 } else { 2776 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2777 2778 r_waits3 = s_waits3 + nsends; 2779 } 2780 2781 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2782 if (reuse == MAT_INITIAL_MATRIX) { 2783 /* get new tags to keep the communication clean */ 2784 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2785 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2786 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2787 2788 /* post receives of other's nzlocal */ 2789 for (i=0; i<nrecvs; i++) { 2790 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2791 } 2792 /* send nzlocal to others */ 2793 for (i=0; i<nsends; i++) { 2794 sbuf_nz[i] = nzlocal; 2795 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2796 } 2797 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2798 count = nrecvs; 2799 while (count) { 2800 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2801 2802 recv_rank[imdex] = recv_status.MPI_SOURCE; 2803 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2804 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2805 2806 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2807 2808 rbuf_nz[imdex] += i + 2; 2809 2810 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2811 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2812 count--; 2813 } 2814 /* wait on sends of nzlocal */ 2815 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2816 /* send mat->i,j to others, and recv from other's */ 2817 /*------------------------------------------------*/ 2818 for (i=0; i<nsends; i++) { 2819 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2820 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2821 } 2822 /* wait on receives of mat->i,j */ 2823 /*------------------------------*/ 2824 count = nrecvs; 2825 while (count) { 2826 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2827 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2828 count--; 2829 } 2830 /* wait on sends of mat->i,j */ 2831 /*---------------------------*/ 2832 if (nsends) { 2833 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2834 } 2835 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2836 2837 /* post receives, send and receive mat->a */ 2838 /*----------------------------------------*/ 2839 for (imdex=0; imdex<nrecvs; imdex++) { 2840 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2841 } 2842 for (i=0; i<nsends; i++) { 2843 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2844 } 2845 count = nrecvs; 2846 while (count) { 2847 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2848 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2849 count--; 2850 } 2851 if (nsends) { 2852 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2853 } 2854 2855 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2856 2857 /* create redundant matrix */ 2858 /*-------------------------*/ 2859 if (reuse == MAT_INITIAL_MATRIX) { 2860 /* compute rownz_max for preallocation */ 2861 for (imdex=0; imdex<nrecvs; imdex++) { 2862 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2863 rptr = rbuf_j[imdex]; 2864 for (i=0; i<j; i++) { 2865 ncols = rptr[i+1] - rptr[i]; 2866 if (rownz_max < ncols) rownz_max = ncols; 2867 } 2868 } 2869 2870 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2871 if (M == N) { 2872 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2873 } else { /* non-square matrix */ 2874 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2875 } 2876 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2877 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2878 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2879 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2880 } else { 2881 C = *matredundant; 2882 } 2883 2884 /* insert local matrix entries */ 2885 rptr = sbuf_j; 2886 cols = sbuf_j + rend-rstart + 1; 2887 vals = sbuf_a; 2888 for (i=0; i<rend-rstart; i++) { 2889 row = i + rstart; 2890 ncols = rptr[i+1] - rptr[i]; 2891 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2892 vals += ncols; 2893 cols += ncols; 2894 } 2895 /* insert received matrix entries */ 2896 for (imdex=0; imdex<nrecvs; imdex++) { 2897 rstart = rowrange[recv_rank[imdex]]; 2898 rend = rowrange[recv_rank[imdex]+1]; 2899 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2900 rptr = rbuf_j[imdex]; 2901 cols = rbuf_j[imdex] + rend-rstart + 1; 2902 vals = rbuf_a[imdex]; 2903 for (i=0; i<rend-rstart; i++) { 2904 row = i + rstart; 2905 ncols = rptr[i+1] - rptr[i]; 2906 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2907 vals += ncols; 2908 cols += ncols; 2909 } 2910 } 2911 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2912 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2913 2914 if (reuse == MAT_INITIAL_MATRIX) { 2915 *matredundant = C; 2916 2917 /* create a supporting struct and attach it to C for reuse */ 2918 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2919 if (subsize == 1) { 2920 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2921 c->redundant = redund; 2922 } else { 2923 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2924 c->redundant = redund; 2925 } 2926 2927 redund->nzlocal = nzlocal; 2928 redund->nsends = nsends; 2929 redund->nrecvs = nrecvs; 2930 redund->send_rank = send_rank; 2931 redund->recv_rank = recv_rank; 2932 redund->sbuf_nz = sbuf_nz; 2933 redund->rbuf_nz = rbuf_nz; 2934 redund->sbuf_j = sbuf_j; 2935 redund->sbuf_a = sbuf_a; 2936 redund->rbuf_j = rbuf_j; 2937 redund->rbuf_a = rbuf_a; 2938 redund->psubcomm = NULL; 2939 2940 redund->Destroy = C->ops->destroy; 2941 C->ops->destroy = MatDestroy_MatRedundant; 2942 } 2943 PetscFunctionReturn(0); 2944 } 2945 2946 #undef __FUNCT__ 2947 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2948 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2949 { 2950 PetscErrorCode ierr; 2951 2952 PetscFunctionBegin; 2953 /* Only MatGetRedundantMatrix_MPIAIJ_interlaced() is written now */ 2954 if (subcomm == MPI_COMM_NULL || subcomm == PETSC_COMM_SELF) { /* create psubcomm */ 2955 MPI_Comm comm; 2956 PetscSubcomm psubcomm; 2957 PetscMPIInt size,subsize; 2958 PetscInt type=2; 2959 PetscBool flg=PETSC_FALSE; 2960 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2961 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2962 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2963 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2964 2965 ierr = PetscOptionsGetInt(NULL,"-subcomm_type",&type,NULL);CHKERRQ(ierr); 2966 if (type == 2) { 2967 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_INTERLACED);CHKERRQ(ierr); 2968 } else { 2969 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2970 } 2971 ierr = PetscOptionsHasName(NULL, "-psubcomm_view", &flg);CHKERRQ(ierr); 2972 if (flg) { 2973 ierr = PetscSubcommView(psubcomm,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 2974 } 2975 2976 ierr = MatGetRedundantMatrix_MPIAIJ_psubcomm(mat,nsubcomm,psubcomm,reuse,matredundant);CHKERRQ(ierr); 2977 2978 /* free psubcomm in MatDestroy_MatRedundant() */ 2979 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2980 Mat C = *matredundant; 2981 if (subsize == 1) { 2982 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2983 c->redundant->psubcomm = psubcomm; 2984 } else { 2985 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2986 c->redundant->psubcomm = psubcomm ; 2987 } 2988 } else { 2989 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet"); 2990 } 2991 PetscFunctionReturn(0); 2992 } 2993 2994 #undef __FUNCT__ 2995 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2996 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2997 { 2998 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2999 PetscErrorCode ierr; 3000 PetscInt i,*idxb = 0; 3001 PetscScalar *va,*vb; 3002 Vec vtmp; 3003 3004 PetscFunctionBegin; 3005 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 3006 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3007 if (idx) { 3008 for (i=0; i<A->rmap->n; i++) { 3009 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 3010 } 3011 } 3012 3013 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 3014 if (idx) { 3015 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 3016 } 3017 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 3018 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 3019 3020 for (i=0; i<A->rmap->n; i++) { 3021 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 3022 va[i] = vb[i]; 3023 if (idx) idx[i] = a->garray[idxb[i]]; 3024 } 3025 } 3026 3027 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3028 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3029 ierr = PetscFree(idxb);CHKERRQ(ierr); 3030 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3031 PetscFunctionReturn(0); 3032 } 3033 3034 #undef __FUNCT__ 3035 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 3036 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3037 { 3038 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3039 PetscErrorCode ierr; 3040 PetscInt i,*idxb = 0; 3041 PetscScalar *va,*vb; 3042 Vec vtmp; 3043 3044 PetscFunctionBegin; 3045 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 3046 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3047 if (idx) { 3048 for (i=0; i<A->cmap->n; i++) { 3049 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 3050 } 3051 } 3052 3053 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 3054 if (idx) { 3055 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 3056 } 3057 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 3058 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 3059 3060 for (i=0; i<A->rmap->n; i++) { 3061 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 3062 va[i] = vb[i]; 3063 if (idx) idx[i] = a->garray[idxb[i]]; 3064 } 3065 } 3066 3067 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3068 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3069 ierr = PetscFree(idxb);CHKERRQ(ierr); 3070 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3071 PetscFunctionReturn(0); 3072 } 3073 3074 #undef __FUNCT__ 3075 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 3076 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3077 { 3078 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3079 PetscInt n = A->rmap->n; 3080 PetscInt cstart = A->cmap->rstart; 3081 PetscInt *cmap = mat->garray; 3082 PetscInt *diagIdx, *offdiagIdx; 3083 Vec diagV, offdiagV; 3084 PetscScalar *a, *diagA, *offdiagA; 3085 PetscInt r; 3086 PetscErrorCode ierr; 3087 3088 PetscFunctionBegin; 3089 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3090 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 3091 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 3092 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3093 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3094 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3095 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3096 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3097 for (r = 0; r < n; ++r) { 3098 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3099 a[r] = diagA[r]; 3100 idx[r] = cstart + diagIdx[r]; 3101 } else { 3102 a[r] = offdiagA[r]; 3103 idx[r] = cmap[offdiagIdx[r]]; 3104 } 3105 } 3106 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3107 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3108 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3109 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3110 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3111 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3112 PetscFunctionReturn(0); 3113 } 3114 3115 #undef __FUNCT__ 3116 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3117 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3118 { 3119 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3120 PetscInt n = A->rmap->n; 3121 PetscInt cstart = A->cmap->rstart; 3122 PetscInt *cmap = mat->garray; 3123 PetscInt *diagIdx, *offdiagIdx; 3124 Vec diagV, offdiagV; 3125 PetscScalar *a, *diagA, *offdiagA; 3126 PetscInt r; 3127 PetscErrorCode ierr; 3128 3129 PetscFunctionBegin; 3130 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3131 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3132 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3133 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3134 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3135 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3136 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3137 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3138 for (r = 0; r < n; ++r) { 3139 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3140 a[r] = diagA[r]; 3141 idx[r] = cstart + diagIdx[r]; 3142 } else { 3143 a[r] = offdiagA[r]; 3144 idx[r] = cmap[offdiagIdx[r]]; 3145 } 3146 } 3147 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3148 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3149 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3150 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3151 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3152 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3153 PetscFunctionReturn(0); 3154 } 3155 3156 #undef __FUNCT__ 3157 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3158 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3159 { 3160 PetscErrorCode ierr; 3161 Mat *dummy; 3162 3163 PetscFunctionBegin; 3164 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3165 *newmat = *dummy; 3166 ierr = PetscFree(dummy);CHKERRQ(ierr); 3167 PetscFunctionReturn(0); 3168 } 3169 3170 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3171 3172 #undef __FUNCT__ 3173 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3174 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3175 { 3176 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3177 PetscErrorCode ierr; 3178 3179 PetscFunctionBegin; 3180 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3181 PetscFunctionReturn(0); 3182 } 3183 3184 #undef __FUNCT__ 3185 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3186 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3187 { 3188 PetscErrorCode ierr; 3189 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3190 3191 PetscFunctionBegin; 3192 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3193 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3194 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3195 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3196 PetscFunctionReturn(0); 3197 } 3198 3199 /* -------------------------------------------------------------------*/ 3200 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3201 MatGetRow_MPIAIJ, 3202 MatRestoreRow_MPIAIJ, 3203 MatMult_MPIAIJ, 3204 /* 4*/ MatMultAdd_MPIAIJ, 3205 MatMultTranspose_MPIAIJ, 3206 MatMultTransposeAdd_MPIAIJ, 3207 #if defined(PETSC_HAVE_PBGL) 3208 MatSolve_MPIAIJ, 3209 #else 3210 0, 3211 #endif 3212 0, 3213 0, 3214 /*10*/ 0, 3215 0, 3216 0, 3217 MatSOR_MPIAIJ, 3218 MatTranspose_MPIAIJ, 3219 /*15*/ MatGetInfo_MPIAIJ, 3220 MatEqual_MPIAIJ, 3221 MatGetDiagonal_MPIAIJ, 3222 MatDiagonalScale_MPIAIJ, 3223 MatNorm_MPIAIJ, 3224 /*20*/ MatAssemblyBegin_MPIAIJ, 3225 MatAssemblyEnd_MPIAIJ, 3226 MatSetOption_MPIAIJ, 3227 MatZeroEntries_MPIAIJ, 3228 /*24*/ MatZeroRows_MPIAIJ, 3229 0, 3230 #if defined(PETSC_HAVE_PBGL) 3231 0, 3232 #else 3233 0, 3234 #endif 3235 0, 3236 0, 3237 /*29*/ MatSetUp_MPIAIJ, 3238 #if defined(PETSC_HAVE_PBGL) 3239 0, 3240 #else 3241 0, 3242 #endif 3243 0, 3244 0, 3245 0, 3246 /*34*/ MatDuplicate_MPIAIJ, 3247 0, 3248 0, 3249 0, 3250 0, 3251 /*39*/ MatAXPY_MPIAIJ, 3252 MatGetSubMatrices_MPIAIJ, 3253 MatIncreaseOverlap_MPIAIJ, 3254 MatGetValues_MPIAIJ, 3255 MatCopy_MPIAIJ, 3256 /*44*/ MatGetRowMax_MPIAIJ, 3257 MatScale_MPIAIJ, 3258 0, 3259 0, 3260 MatZeroRowsColumns_MPIAIJ, 3261 /*49*/ MatSetRandom_MPIAIJ, 3262 0, 3263 0, 3264 0, 3265 0, 3266 /*54*/ MatFDColoringCreate_MPIAIJ, 3267 0, 3268 MatSetUnfactored_MPIAIJ, 3269 MatPermute_MPIAIJ, 3270 0, 3271 /*59*/ MatGetSubMatrix_MPIAIJ, 3272 MatDestroy_MPIAIJ, 3273 MatView_MPIAIJ, 3274 0, 3275 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3276 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3277 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3278 0, 3279 0, 3280 0, 3281 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3282 MatGetRowMinAbs_MPIAIJ, 3283 0, 3284 MatSetColoring_MPIAIJ, 3285 0, 3286 MatSetValuesAdifor_MPIAIJ, 3287 /*75*/ MatFDColoringApply_AIJ, 3288 0, 3289 0, 3290 0, 3291 MatFindZeroDiagonals_MPIAIJ, 3292 /*80*/ 0, 3293 0, 3294 0, 3295 /*83*/ MatLoad_MPIAIJ, 3296 0, 3297 0, 3298 0, 3299 0, 3300 0, 3301 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3302 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3303 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3304 MatPtAP_MPIAIJ_MPIAIJ, 3305 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3306 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3307 0, 3308 0, 3309 0, 3310 0, 3311 /*99*/ 0, 3312 0, 3313 0, 3314 MatConjugate_MPIAIJ, 3315 0, 3316 /*104*/MatSetValuesRow_MPIAIJ, 3317 MatRealPart_MPIAIJ, 3318 MatImaginaryPart_MPIAIJ, 3319 0, 3320 0, 3321 /*109*/0, 3322 MatGetRedundantMatrix_MPIAIJ, 3323 MatGetRowMin_MPIAIJ, 3324 0, 3325 0, 3326 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3327 0, 3328 0, 3329 0, 3330 0, 3331 /*119*/0, 3332 0, 3333 0, 3334 0, 3335 MatGetMultiProcBlock_MPIAIJ, 3336 /*124*/MatFindNonzeroRows_MPIAIJ, 3337 MatGetColumnNorms_MPIAIJ, 3338 MatInvertBlockDiagonal_MPIAIJ, 3339 0, 3340 MatGetSubMatricesParallel_MPIAIJ, 3341 /*129*/0, 3342 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3343 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3344 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3345 0, 3346 /*134*/0, 3347 0, 3348 0, 3349 0, 3350 0, 3351 /*139*/0, 3352 0 3353 }; 3354 3355 /* ----------------------------------------------------------------------------------------*/ 3356 3357 #undef __FUNCT__ 3358 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3359 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3360 { 3361 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3362 PetscErrorCode ierr; 3363 3364 PetscFunctionBegin; 3365 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3366 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3367 PetscFunctionReturn(0); 3368 } 3369 3370 #undef __FUNCT__ 3371 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3372 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3373 { 3374 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3375 PetscErrorCode ierr; 3376 3377 PetscFunctionBegin; 3378 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3379 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3380 PetscFunctionReturn(0); 3381 } 3382 3383 #undef __FUNCT__ 3384 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3385 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3386 { 3387 Mat_MPIAIJ *b; 3388 PetscErrorCode ierr; 3389 3390 PetscFunctionBegin; 3391 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3392 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3393 b = (Mat_MPIAIJ*)B->data; 3394 3395 if (!B->preallocated) { 3396 /* Explicitly create 2 MATSEQAIJ matrices. */ 3397 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3398 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3399 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3400 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3401 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3402 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3403 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3404 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3405 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3406 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3407 } 3408 3409 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3410 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3411 B->preallocated = PETSC_TRUE; 3412 PetscFunctionReturn(0); 3413 } 3414 3415 #undef __FUNCT__ 3416 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3417 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3418 { 3419 Mat mat; 3420 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3421 PetscErrorCode ierr; 3422 3423 PetscFunctionBegin; 3424 *newmat = 0; 3425 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3426 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3427 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3428 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3429 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3430 a = (Mat_MPIAIJ*)mat->data; 3431 3432 mat->factortype = matin->factortype; 3433 mat->rmap->bs = matin->rmap->bs; 3434 mat->cmap->bs = matin->cmap->bs; 3435 mat->assembled = PETSC_TRUE; 3436 mat->insertmode = NOT_SET_VALUES; 3437 mat->preallocated = PETSC_TRUE; 3438 3439 a->size = oldmat->size; 3440 a->rank = oldmat->rank; 3441 a->donotstash = oldmat->donotstash; 3442 a->roworiented = oldmat->roworiented; 3443 a->rowindices = 0; 3444 a->rowvalues = 0; 3445 a->getrowactive = PETSC_FALSE; 3446 3447 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3448 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3449 3450 if (oldmat->colmap) { 3451 #if defined(PETSC_USE_CTABLE) 3452 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3453 #else 3454 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3455 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3456 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3457 #endif 3458 } else a->colmap = 0; 3459 if (oldmat->garray) { 3460 PetscInt len; 3461 len = oldmat->B->cmap->n; 3462 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3463 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3464 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3465 } else a->garray = 0; 3466 3467 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3468 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3469 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3470 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3471 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3472 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3473 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3474 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3475 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3476 *newmat = mat; 3477 PetscFunctionReturn(0); 3478 } 3479 3480 3481 3482 #undef __FUNCT__ 3483 #define __FUNCT__ "MatLoad_MPIAIJ" 3484 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3485 { 3486 PetscScalar *vals,*svals; 3487 MPI_Comm comm; 3488 PetscErrorCode ierr; 3489 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3490 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3491 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3492 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3493 PetscInt cend,cstart,n,*rowners,sizesset=1; 3494 int fd; 3495 PetscInt bs = 1; 3496 3497 PetscFunctionBegin; 3498 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3499 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3500 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3501 if (!rank) { 3502 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3503 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3504 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3505 } 3506 3507 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3508 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3509 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3510 3511 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3512 3513 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3514 M = header[1]; N = header[2]; 3515 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3516 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3517 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3518 3519 /* If global sizes are set, check if they are consistent with that given in the file */ 3520 if (sizesset) { 3521 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3522 } 3523 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3524 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3525 3526 /* determine ownership of all (block) rows */ 3527 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3528 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3529 else m = newMat->rmap->n; /* Set by user */ 3530 3531 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3532 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3533 3534 /* First process needs enough room for process with most rows */ 3535 if (!rank) { 3536 mmax = rowners[1]; 3537 for (i=2; i<=size; i++) { 3538 mmax = PetscMax(mmax, rowners[i]); 3539 } 3540 } else mmax = -1; /* unused, but compilers complain */ 3541 3542 rowners[0] = 0; 3543 for (i=2; i<=size; i++) { 3544 rowners[i] += rowners[i-1]; 3545 } 3546 rstart = rowners[rank]; 3547 rend = rowners[rank+1]; 3548 3549 /* distribute row lengths to all processors */ 3550 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 3551 if (!rank) { 3552 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3553 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3554 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3555 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3556 for (j=0; j<m; j++) { 3557 procsnz[0] += ourlens[j]; 3558 } 3559 for (i=1; i<size; i++) { 3560 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3561 /* calculate the number of nonzeros on each processor */ 3562 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3563 procsnz[i] += rowlengths[j]; 3564 } 3565 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3566 } 3567 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3568 } else { 3569 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3570 } 3571 3572 if (!rank) { 3573 /* determine max buffer needed and allocate it */ 3574 maxnz = 0; 3575 for (i=0; i<size; i++) { 3576 maxnz = PetscMax(maxnz,procsnz[i]); 3577 } 3578 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3579 3580 /* read in my part of the matrix column indices */ 3581 nz = procsnz[0]; 3582 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3583 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3584 3585 /* read in every one elses and ship off */ 3586 for (i=1; i<size; i++) { 3587 nz = procsnz[i]; 3588 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3589 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3590 } 3591 ierr = PetscFree(cols);CHKERRQ(ierr); 3592 } else { 3593 /* determine buffer space needed for message */ 3594 nz = 0; 3595 for (i=0; i<m; i++) { 3596 nz += ourlens[i]; 3597 } 3598 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3599 3600 /* receive message of column indices*/ 3601 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3602 } 3603 3604 /* determine column ownership if matrix is not square */ 3605 if (N != M) { 3606 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3607 else n = newMat->cmap->n; 3608 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3609 cstart = cend - n; 3610 } else { 3611 cstart = rstart; 3612 cend = rend; 3613 n = cend - cstart; 3614 } 3615 3616 /* loop over local rows, determining number of off diagonal entries */ 3617 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3618 jj = 0; 3619 for (i=0; i<m; i++) { 3620 for (j=0; j<ourlens[i]; j++) { 3621 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3622 jj++; 3623 } 3624 } 3625 3626 for (i=0; i<m; i++) { 3627 ourlens[i] -= offlens[i]; 3628 } 3629 if (!sizesset) { 3630 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3631 } 3632 3633 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3634 3635 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3636 3637 for (i=0; i<m; i++) { 3638 ourlens[i] += offlens[i]; 3639 } 3640 3641 if (!rank) { 3642 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3643 3644 /* read in my part of the matrix numerical values */ 3645 nz = procsnz[0]; 3646 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3647 3648 /* insert into matrix */ 3649 jj = rstart; 3650 smycols = mycols; 3651 svals = vals; 3652 for (i=0; i<m; i++) { 3653 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3654 smycols += ourlens[i]; 3655 svals += ourlens[i]; 3656 jj++; 3657 } 3658 3659 /* read in other processors and ship out */ 3660 for (i=1; i<size; i++) { 3661 nz = procsnz[i]; 3662 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3663 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3664 } 3665 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3666 } else { 3667 /* receive numeric values */ 3668 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3669 3670 /* receive message of values*/ 3671 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3672 3673 /* insert into matrix */ 3674 jj = rstart; 3675 smycols = mycols; 3676 svals = vals; 3677 for (i=0; i<m; i++) { 3678 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3679 smycols += ourlens[i]; 3680 svals += ourlens[i]; 3681 jj++; 3682 } 3683 } 3684 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3685 ierr = PetscFree(vals);CHKERRQ(ierr); 3686 ierr = PetscFree(mycols);CHKERRQ(ierr); 3687 ierr = PetscFree(rowners);CHKERRQ(ierr); 3688 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3689 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3690 PetscFunctionReturn(0); 3691 } 3692 3693 #undef __FUNCT__ 3694 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3695 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3696 { 3697 PetscErrorCode ierr; 3698 IS iscol_local; 3699 PetscInt csize; 3700 3701 PetscFunctionBegin; 3702 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3703 if (call == MAT_REUSE_MATRIX) { 3704 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3705 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3706 } else { 3707 PetscInt cbs; 3708 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3709 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3710 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3711 } 3712 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3713 if (call == MAT_INITIAL_MATRIX) { 3714 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3715 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3716 } 3717 PetscFunctionReturn(0); 3718 } 3719 3720 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3721 #undef __FUNCT__ 3722 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3723 /* 3724 Not great since it makes two copies of the submatrix, first an SeqAIJ 3725 in local and then by concatenating the local matrices the end result. 3726 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3727 3728 Note: This requires a sequential iscol with all indices. 3729 */ 3730 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3731 { 3732 PetscErrorCode ierr; 3733 PetscMPIInt rank,size; 3734 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3735 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3736 PetscBool allcolumns, colflag; 3737 Mat M,Mreuse; 3738 MatScalar *vwork,*aa; 3739 MPI_Comm comm; 3740 Mat_SeqAIJ *aij; 3741 3742 PetscFunctionBegin; 3743 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3744 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3745 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3746 3747 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3748 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3749 if (colflag && ncol == mat->cmap->N) { 3750 allcolumns = PETSC_TRUE; 3751 } else { 3752 allcolumns = PETSC_FALSE; 3753 } 3754 if (call == MAT_REUSE_MATRIX) { 3755 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3756 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3757 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3758 } else { 3759 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3760 } 3761 3762 /* 3763 m - number of local rows 3764 n - number of columns (same on all processors) 3765 rstart - first row in new global matrix generated 3766 */ 3767 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3768 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3769 if (call == MAT_INITIAL_MATRIX) { 3770 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3771 ii = aij->i; 3772 jj = aij->j; 3773 3774 /* 3775 Determine the number of non-zeros in the diagonal and off-diagonal 3776 portions of the matrix in order to do correct preallocation 3777 */ 3778 3779 /* first get start and end of "diagonal" columns */ 3780 if (csize == PETSC_DECIDE) { 3781 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3782 if (mglobal == n) { /* square matrix */ 3783 nlocal = m; 3784 } else { 3785 nlocal = n/size + ((n % size) > rank); 3786 } 3787 } else { 3788 nlocal = csize; 3789 } 3790 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3791 rstart = rend - nlocal; 3792 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3793 3794 /* next, compute all the lengths */ 3795 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3796 olens = dlens + m; 3797 for (i=0; i<m; i++) { 3798 jend = ii[i+1] - ii[i]; 3799 olen = 0; 3800 dlen = 0; 3801 for (j=0; j<jend; j++) { 3802 if (*jj < rstart || *jj >= rend) olen++; 3803 else dlen++; 3804 jj++; 3805 } 3806 olens[i] = olen; 3807 dlens[i] = dlen; 3808 } 3809 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3810 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3811 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3812 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3813 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3814 ierr = PetscFree(dlens);CHKERRQ(ierr); 3815 } else { 3816 PetscInt ml,nl; 3817 3818 M = *newmat; 3819 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3820 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3821 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3822 /* 3823 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3824 rather than the slower MatSetValues(). 3825 */ 3826 M->was_assembled = PETSC_TRUE; 3827 M->assembled = PETSC_FALSE; 3828 } 3829 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3830 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3831 ii = aij->i; 3832 jj = aij->j; 3833 aa = aij->a; 3834 for (i=0; i<m; i++) { 3835 row = rstart + i; 3836 nz = ii[i+1] - ii[i]; 3837 cwork = jj; jj += nz; 3838 vwork = aa; aa += nz; 3839 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3840 } 3841 3842 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3843 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3844 *newmat = M; 3845 3846 /* save submatrix used in processor for next request */ 3847 if (call == MAT_INITIAL_MATRIX) { 3848 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3849 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3850 } 3851 PetscFunctionReturn(0); 3852 } 3853 3854 #undef __FUNCT__ 3855 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3856 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3857 { 3858 PetscInt m,cstart, cend,j,nnz,i,d; 3859 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3860 const PetscInt *JJ; 3861 PetscScalar *values; 3862 PetscErrorCode ierr; 3863 3864 PetscFunctionBegin; 3865 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3866 3867 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3868 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3869 m = B->rmap->n; 3870 cstart = B->cmap->rstart; 3871 cend = B->cmap->rend; 3872 rstart = B->rmap->rstart; 3873 3874 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3875 3876 #if defined(PETSC_USE_DEBUGGING) 3877 for (i=0; i<m; i++) { 3878 nnz = Ii[i+1]- Ii[i]; 3879 JJ = J + Ii[i]; 3880 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3881 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3882 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3883 } 3884 #endif 3885 3886 for (i=0; i<m; i++) { 3887 nnz = Ii[i+1]- Ii[i]; 3888 JJ = J + Ii[i]; 3889 nnz_max = PetscMax(nnz_max,nnz); 3890 d = 0; 3891 for (j=0; j<nnz; j++) { 3892 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3893 } 3894 d_nnz[i] = d; 3895 o_nnz[i] = nnz - d; 3896 } 3897 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3898 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3899 3900 if (v) values = (PetscScalar*)v; 3901 else { 3902 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3903 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3904 } 3905 3906 for (i=0; i<m; i++) { 3907 ii = i + rstart; 3908 nnz = Ii[i+1]- Ii[i]; 3909 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3910 } 3911 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3912 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3913 3914 if (!v) { 3915 ierr = PetscFree(values);CHKERRQ(ierr); 3916 } 3917 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3918 PetscFunctionReturn(0); 3919 } 3920 3921 #undef __FUNCT__ 3922 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3923 /*@ 3924 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3925 (the default parallel PETSc format). 3926 3927 Collective on MPI_Comm 3928 3929 Input Parameters: 3930 + B - the matrix 3931 . i - the indices into j for the start of each local row (starts with zero) 3932 . j - the column indices for each local row (starts with zero) 3933 - v - optional values in the matrix 3934 3935 Level: developer 3936 3937 Notes: 3938 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3939 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3940 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3941 3942 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3943 3944 The format which is used for the sparse matrix input, is equivalent to a 3945 row-major ordering.. i.e for the following matrix, the input data expected is 3946 as shown: 3947 3948 1 0 0 3949 2 0 3 P0 3950 ------- 3951 4 5 6 P1 3952 3953 Process0 [P0]: rows_owned=[0,1] 3954 i = {0,1,3} [size = nrow+1 = 2+1] 3955 j = {0,0,2} [size = nz = 6] 3956 v = {1,2,3} [size = nz = 6] 3957 3958 Process1 [P1]: rows_owned=[2] 3959 i = {0,3} [size = nrow+1 = 1+1] 3960 j = {0,1,2} [size = nz = 6] 3961 v = {4,5,6} [size = nz = 6] 3962 3963 .keywords: matrix, aij, compressed row, sparse, parallel 3964 3965 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3966 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3967 @*/ 3968 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3969 { 3970 PetscErrorCode ierr; 3971 3972 PetscFunctionBegin; 3973 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3974 PetscFunctionReturn(0); 3975 } 3976 3977 #undef __FUNCT__ 3978 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3979 /*@C 3980 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3981 (the default parallel PETSc format). For good matrix assembly performance 3982 the user should preallocate the matrix storage by setting the parameters 3983 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3984 performance can be increased by more than a factor of 50. 3985 3986 Collective on MPI_Comm 3987 3988 Input Parameters: 3989 + A - the matrix 3990 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3991 (same value is used for all local rows) 3992 . d_nnz - array containing the number of nonzeros in the various rows of the 3993 DIAGONAL portion of the local submatrix (possibly different for each row) 3994 or NULL, if d_nz is used to specify the nonzero structure. 3995 The size of this array is equal to the number of local rows, i.e 'm'. 3996 For matrices that will be factored, you must leave room for (and set) 3997 the diagonal entry even if it is zero. 3998 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3999 submatrix (same value is used for all local rows). 4000 - o_nnz - array containing the number of nonzeros in the various rows of the 4001 OFF-DIAGONAL portion of the local submatrix (possibly different for 4002 each row) or NULL, if o_nz is used to specify the nonzero 4003 structure. The size of this array is equal to the number 4004 of local rows, i.e 'm'. 4005 4006 If the *_nnz parameter is given then the *_nz parameter is ignored 4007 4008 The AIJ format (also called the Yale sparse matrix format or 4009 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4010 storage. The stored row and column indices begin with zero. 4011 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 4012 4013 The parallel matrix is partitioned such that the first m0 rows belong to 4014 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4015 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4016 4017 The DIAGONAL portion of the local submatrix of a processor can be defined 4018 as the submatrix which is obtained by extraction the part corresponding to 4019 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4020 first row that belongs to the processor, r2 is the last row belonging to 4021 the this processor, and c1-c2 is range of indices of the local part of a 4022 vector suitable for applying the matrix to. This is an mxn matrix. In the 4023 common case of a square matrix, the row and column ranges are the same and 4024 the DIAGONAL part is also square. The remaining portion of the local 4025 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4026 4027 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4028 4029 You can call MatGetInfo() to get information on how effective the preallocation was; 4030 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4031 You can also run with the option -info and look for messages with the string 4032 malloc in them to see if additional memory allocation was needed. 4033 4034 Example usage: 4035 4036 Consider the following 8x8 matrix with 34 non-zero values, that is 4037 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4038 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4039 as follows: 4040 4041 .vb 4042 1 2 0 | 0 3 0 | 0 4 4043 Proc0 0 5 6 | 7 0 0 | 8 0 4044 9 0 10 | 11 0 0 | 12 0 4045 ------------------------------------- 4046 13 0 14 | 15 16 17 | 0 0 4047 Proc1 0 18 0 | 19 20 21 | 0 0 4048 0 0 0 | 22 23 0 | 24 0 4049 ------------------------------------- 4050 Proc2 25 26 27 | 0 0 28 | 29 0 4051 30 0 0 | 31 32 33 | 0 34 4052 .ve 4053 4054 This can be represented as a collection of submatrices as: 4055 4056 .vb 4057 A B C 4058 D E F 4059 G H I 4060 .ve 4061 4062 Where the submatrices A,B,C are owned by proc0, D,E,F are 4063 owned by proc1, G,H,I are owned by proc2. 4064 4065 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4066 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4067 The 'M','N' parameters are 8,8, and have the same values on all procs. 4068 4069 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4070 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4071 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4072 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4073 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4074 matrix, ans [DF] as another SeqAIJ matrix. 4075 4076 When d_nz, o_nz parameters are specified, d_nz storage elements are 4077 allocated for every row of the local diagonal submatrix, and o_nz 4078 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4079 One way to choose d_nz and o_nz is to use the max nonzerors per local 4080 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4081 In this case, the values of d_nz,o_nz are: 4082 .vb 4083 proc0 : dnz = 2, o_nz = 2 4084 proc1 : dnz = 3, o_nz = 2 4085 proc2 : dnz = 1, o_nz = 4 4086 .ve 4087 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4088 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4089 for proc3. i.e we are using 12+15+10=37 storage locations to store 4090 34 values. 4091 4092 When d_nnz, o_nnz parameters are specified, the storage is specified 4093 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4094 In the above case the values for d_nnz,o_nnz are: 4095 .vb 4096 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4097 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4098 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4099 .ve 4100 Here the space allocated is sum of all the above values i.e 34, and 4101 hence pre-allocation is perfect. 4102 4103 Level: intermediate 4104 4105 .keywords: matrix, aij, compressed row, sparse, parallel 4106 4107 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4108 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4109 @*/ 4110 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4111 { 4112 PetscErrorCode ierr; 4113 4114 PetscFunctionBegin; 4115 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4116 PetscValidType(B,1); 4117 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4118 PetscFunctionReturn(0); 4119 } 4120 4121 #undef __FUNCT__ 4122 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4123 /*@ 4124 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4125 CSR format the local rows. 4126 4127 Collective on MPI_Comm 4128 4129 Input Parameters: 4130 + comm - MPI communicator 4131 . m - number of local rows (Cannot be PETSC_DECIDE) 4132 . n - This value should be the same as the local size used in creating the 4133 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4134 calculated if N is given) For square matrices n is almost always m. 4135 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4136 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4137 . i - row indices 4138 . j - column indices 4139 - a - matrix values 4140 4141 Output Parameter: 4142 . mat - the matrix 4143 4144 Level: intermediate 4145 4146 Notes: 4147 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4148 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4149 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4150 4151 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4152 4153 The format which is used for the sparse matrix input, is equivalent to a 4154 row-major ordering.. i.e for the following matrix, the input data expected is 4155 as shown: 4156 4157 1 0 0 4158 2 0 3 P0 4159 ------- 4160 4 5 6 P1 4161 4162 Process0 [P0]: rows_owned=[0,1] 4163 i = {0,1,3} [size = nrow+1 = 2+1] 4164 j = {0,0,2} [size = nz = 6] 4165 v = {1,2,3} [size = nz = 6] 4166 4167 Process1 [P1]: rows_owned=[2] 4168 i = {0,3} [size = nrow+1 = 1+1] 4169 j = {0,1,2} [size = nz = 6] 4170 v = {4,5,6} [size = nz = 6] 4171 4172 .keywords: matrix, aij, compressed row, sparse, parallel 4173 4174 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4175 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4176 @*/ 4177 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4178 { 4179 PetscErrorCode ierr; 4180 4181 PetscFunctionBegin; 4182 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4183 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4184 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4185 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4186 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4187 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4188 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4189 PetscFunctionReturn(0); 4190 } 4191 4192 #undef __FUNCT__ 4193 #define __FUNCT__ "MatCreateAIJ" 4194 /*@C 4195 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4196 (the default parallel PETSc format). For good matrix assembly performance 4197 the user should preallocate the matrix storage by setting the parameters 4198 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4199 performance can be increased by more than a factor of 50. 4200 4201 Collective on MPI_Comm 4202 4203 Input Parameters: 4204 + comm - MPI communicator 4205 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4206 This value should be the same as the local size used in creating the 4207 y vector for the matrix-vector product y = Ax. 4208 . n - This value should be the same as the local size used in creating the 4209 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4210 calculated if N is given) For square matrices n is almost always m. 4211 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4212 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4213 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4214 (same value is used for all local rows) 4215 . d_nnz - array containing the number of nonzeros in the various rows of the 4216 DIAGONAL portion of the local submatrix (possibly different for each row) 4217 or NULL, if d_nz is used to specify the nonzero structure. 4218 The size of this array is equal to the number of local rows, i.e 'm'. 4219 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4220 submatrix (same value is used for all local rows). 4221 - o_nnz - array containing the number of nonzeros in the various rows of the 4222 OFF-DIAGONAL portion of the local submatrix (possibly different for 4223 each row) or NULL, if o_nz is used to specify the nonzero 4224 structure. The size of this array is equal to the number 4225 of local rows, i.e 'm'. 4226 4227 Output Parameter: 4228 . A - the matrix 4229 4230 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4231 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4232 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4233 4234 Notes: 4235 If the *_nnz parameter is given then the *_nz parameter is ignored 4236 4237 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4238 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4239 storage requirements for this matrix. 4240 4241 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4242 processor than it must be used on all processors that share the object for 4243 that argument. 4244 4245 The user MUST specify either the local or global matrix dimensions 4246 (possibly both). 4247 4248 The parallel matrix is partitioned across processors such that the 4249 first m0 rows belong to process 0, the next m1 rows belong to 4250 process 1, the next m2 rows belong to process 2 etc.. where 4251 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4252 values corresponding to [m x N] submatrix. 4253 4254 The columns are logically partitioned with the n0 columns belonging 4255 to 0th partition, the next n1 columns belonging to the next 4256 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4257 4258 The DIAGONAL portion of the local submatrix on any given processor 4259 is the submatrix corresponding to the rows and columns m,n 4260 corresponding to the given processor. i.e diagonal matrix on 4261 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4262 etc. The remaining portion of the local submatrix [m x (N-n)] 4263 constitute the OFF-DIAGONAL portion. The example below better 4264 illustrates this concept. 4265 4266 For a square global matrix we define each processor's diagonal portion 4267 to be its local rows and the corresponding columns (a square submatrix); 4268 each processor's off-diagonal portion encompasses the remainder of the 4269 local matrix (a rectangular submatrix). 4270 4271 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4272 4273 When calling this routine with a single process communicator, a matrix of 4274 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4275 type of communicator, use the construction mechanism: 4276 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4277 4278 By default, this format uses inodes (identical nodes) when possible. 4279 We search for consecutive rows with the same nonzero structure, thereby 4280 reusing matrix information to achieve increased efficiency. 4281 4282 Options Database Keys: 4283 + -mat_no_inode - Do not use inodes 4284 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4285 - -mat_aij_oneindex - Internally use indexing starting at 1 4286 rather than 0. Note that when calling MatSetValues(), 4287 the user still MUST index entries starting at 0! 4288 4289 4290 Example usage: 4291 4292 Consider the following 8x8 matrix with 34 non-zero values, that is 4293 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4294 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4295 as follows: 4296 4297 .vb 4298 1 2 0 | 0 3 0 | 0 4 4299 Proc0 0 5 6 | 7 0 0 | 8 0 4300 9 0 10 | 11 0 0 | 12 0 4301 ------------------------------------- 4302 13 0 14 | 15 16 17 | 0 0 4303 Proc1 0 18 0 | 19 20 21 | 0 0 4304 0 0 0 | 22 23 0 | 24 0 4305 ------------------------------------- 4306 Proc2 25 26 27 | 0 0 28 | 29 0 4307 30 0 0 | 31 32 33 | 0 34 4308 .ve 4309 4310 This can be represented as a collection of submatrices as: 4311 4312 .vb 4313 A B C 4314 D E F 4315 G H I 4316 .ve 4317 4318 Where the submatrices A,B,C are owned by proc0, D,E,F are 4319 owned by proc1, G,H,I are owned by proc2. 4320 4321 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4322 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4323 The 'M','N' parameters are 8,8, and have the same values on all procs. 4324 4325 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4326 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4327 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4328 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4329 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4330 matrix, ans [DF] as another SeqAIJ matrix. 4331 4332 When d_nz, o_nz parameters are specified, d_nz storage elements are 4333 allocated for every row of the local diagonal submatrix, and o_nz 4334 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4335 One way to choose d_nz and o_nz is to use the max nonzerors per local 4336 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4337 In this case, the values of d_nz,o_nz are: 4338 .vb 4339 proc0 : dnz = 2, o_nz = 2 4340 proc1 : dnz = 3, o_nz = 2 4341 proc2 : dnz = 1, o_nz = 4 4342 .ve 4343 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4344 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4345 for proc3. i.e we are using 12+15+10=37 storage locations to store 4346 34 values. 4347 4348 When d_nnz, o_nnz parameters are specified, the storage is specified 4349 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4350 In the above case the values for d_nnz,o_nnz are: 4351 .vb 4352 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4353 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4354 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4355 .ve 4356 Here the space allocated is sum of all the above values i.e 34, and 4357 hence pre-allocation is perfect. 4358 4359 Level: intermediate 4360 4361 .keywords: matrix, aij, compressed row, sparse, parallel 4362 4363 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4364 MPIAIJ, MatCreateMPIAIJWithArrays() 4365 @*/ 4366 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4367 { 4368 PetscErrorCode ierr; 4369 PetscMPIInt size; 4370 4371 PetscFunctionBegin; 4372 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4373 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4374 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4375 if (size > 1) { 4376 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4377 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4378 } else { 4379 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4380 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4381 } 4382 PetscFunctionReturn(0); 4383 } 4384 4385 #undef __FUNCT__ 4386 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4387 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4388 { 4389 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4390 4391 PetscFunctionBegin; 4392 *Ad = a->A; 4393 *Ao = a->B; 4394 *colmap = a->garray; 4395 PetscFunctionReturn(0); 4396 } 4397 4398 #undef __FUNCT__ 4399 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4400 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4401 { 4402 PetscErrorCode ierr; 4403 PetscInt i; 4404 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4405 4406 PetscFunctionBegin; 4407 if (coloring->ctype == IS_COLORING_GLOBAL) { 4408 ISColoringValue *allcolors,*colors; 4409 ISColoring ocoloring; 4410 4411 /* set coloring for diagonal portion */ 4412 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4413 4414 /* set coloring for off-diagonal portion */ 4415 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4416 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4417 for (i=0; i<a->B->cmap->n; i++) { 4418 colors[i] = allcolors[a->garray[i]]; 4419 } 4420 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4421 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4422 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4423 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4424 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4425 ISColoringValue *colors; 4426 PetscInt *larray; 4427 ISColoring ocoloring; 4428 4429 /* set coloring for diagonal portion */ 4430 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4431 for (i=0; i<a->A->cmap->n; i++) { 4432 larray[i] = i + A->cmap->rstart; 4433 } 4434 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4435 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4436 for (i=0; i<a->A->cmap->n; i++) { 4437 colors[i] = coloring->colors[larray[i]]; 4438 } 4439 ierr = PetscFree(larray);CHKERRQ(ierr); 4440 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4441 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4442 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4443 4444 /* set coloring for off-diagonal portion */ 4445 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4446 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4447 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4448 for (i=0; i<a->B->cmap->n; i++) { 4449 colors[i] = coloring->colors[larray[i]]; 4450 } 4451 ierr = PetscFree(larray);CHKERRQ(ierr); 4452 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4453 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4454 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4455 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4456 PetscFunctionReturn(0); 4457 } 4458 4459 #undef __FUNCT__ 4460 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4461 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4462 { 4463 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4464 PetscErrorCode ierr; 4465 4466 PetscFunctionBegin; 4467 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4468 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4469 PetscFunctionReturn(0); 4470 } 4471 4472 #undef __FUNCT__ 4473 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4474 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4475 { 4476 PetscErrorCode ierr; 4477 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4478 PetscInt *indx; 4479 4480 PetscFunctionBegin; 4481 /* This routine will ONLY return MPIAIJ type matrix */ 4482 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4483 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4484 if (n == PETSC_DECIDE) { 4485 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4486 } 4487 /* Check sum(n) = N */ 4488 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4489 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4490 4491 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4492 rstart -= m; 4493 4494 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4495 for (i=0; i<m; i++) { 4496 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4497 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4498 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4499 } 4500 4501 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4502 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4503 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4504 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4505 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4506 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4507 PetscFunctionReturn(0); 4508 } 4509 4510 #undef __FUNCT__ 4511 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4512 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4513 { 4514 PetscErrorCode ierr; 4515 PetscInt m,N,i,rstart,nnz,Ii; 4516 PetscInt *indx; 4517 PetscScalar *values; 4518 4519 PetscFunctionBegin; 4520 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4521 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4522 for (i=0; i<m; i++) { 4523 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4524 Ii = i + rstart; 4525 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4526 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4527 } 4528 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4529 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4530 PetscFunctionReturn(0); 4531 } 4532 4533 #undef __FUNCT__ 4534 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4535 /*@ 4536 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4537 matrices from each processor 4538 4539 Collective on MPI_Comm 4540 4541 Input Parameters: 4542 + comm - the communicators the parallel matrix will live on 4543 . inmat - the input sequential matrices 4544 . n - number of local columns (or PETSC_DECIDE) 4545 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4546 4547 Output Parameter: 4548 . outmat - the parallel matrix generated 4549 4550 Level: advanced 4551 4552 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4553 4554 @*/ 4555 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4556 { 4557 PetscErrorCode ierr; 4558 4559 PetscFunctionBegin; 4560 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4561 if (scall == MAT_INITIAL_MATRIX) { 4562 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4563 } 4564 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4565 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4566 PetscFunctionReturn(0); 4567 } 4568 4569 #undef __FUNCT__ 4570 #define __FUNCT__ "MatFileSplit" 4571 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4572 { 4573 PetscErrorCode ierr; 4574 PetscMPIInt rank; 4575 PetscInt m,N,i,rstart,nnz; 4576 size_t len; 4577 const PetscInt *indx; 4578 PetscViewer out; 4579 char *name; 4580 Mat B; 4581 const PetscScalar *values; 4582 4583 PetscFunctionBegin; 4584 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4585 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4586 /* Should this be the type of the diagonal block of A? */ 4587 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4588 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4589 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4590 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4591 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4592 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4593 for (i=0; i<m; i++) { 4594 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4595 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4596 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4597 } 4598 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4599 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4600 4601 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4602 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4603 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4604 sprintf(name,"%s.%d",outfile,rank); 4605 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4606 ierr = PetscFree(name);CHKERRQ(ierr); 4607 ierr = MatView(B,out);CHKERRQ(ierr); 4608 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4609 ierr = MatDestroy(&B);CHKERRQ(ierr); 4610 PetscFunctionReturn(0); 4611 } 4612 4613 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4614 #undef __FUNCT__ 4615 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4616 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4617 { 4618 PetscErrorCode ierr; 4619 Mat_Merge_SeqsToMPI *merge; 4620 PetscContainer container; 4621 4622 PetscFunctionBegin; 4623 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4624 if (container) { 4625 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4626 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4627 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4628 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4629 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4630 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4631 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4632 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4633 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4634 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4635 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4636 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4637 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4638 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4639 ierr = PetscFree(merge);CHKERRQ(ierr); 4640 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4641 } 4642 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4643 PetscFunctionReturn(0); 4644 } 4645 4646 #include <../src/mat/utils/freespace.h> 4647 #include <petscbt.h> 4648 4649 #undef __FUNCT__ 4650 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4651 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4652 { 4653 PetscErrorCode ierr; 4654 MPI_Comm comm; 4655 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4656 PetscMPIInt size,rank,taga,*len_s; 4657 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4658 PetscInt proc,m; 4659 PetscInt **buf_ri,**buf_rj; 4660 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4661 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4662 MPI_Request *s_waits,*r_waits; 4663 MPI_Status *status; 4664 MatScalar *aa=a->a; 4665 MatScalar **abuf_r,*ba_i; 4666 Mat_Merge_SeqsToMPI *merge; 4667 PetscContainer container; 4668 4669 PetscFunctionBegin; 4670 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4671 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4672 4673 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4674 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4675 4676 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4677 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4678 4679 bi = merge->bi; 4680 bj = merge->bj; 4681 buf_ri = merge->buf_ri; 4682 buf_rj = merge->buf_rj; 4683 4684 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4685 owners = merge->rowmap->range; 4686 len_s = merge->len_s; 4687 4688 /* send and recv matrix values */ 4689 /*-----------------------------*/ 4690 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4691 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4692 4693 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4694 for (proc=0,k=0; proc<size; proc++) { 4695 if (!len_s[proc]) continue; 4696 i = owners[proc]; 4697 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4698 k++; 4699 } 4700 4701 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4702 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4703 ierr = PetscFree(status);CHKERRQ(ierr); 4704 4705 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4706 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4707 4708 /* insert mat values of mpimat */ 4709 /*----------------------------*/ 4710 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4711 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4712 4713 for (k=0; k<merge->nrecv; k++) { 4714 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4715 nrows = *(buf_ri_k[k]); 4716 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4717 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4718 } 4719 4720 /* set values of ba */ 4721 m = merge->rowmap->n; 4722 for (i=0; i<m; i++) { 4723 arow = owners[rank] + i; 4724 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4725 bnzi = bi[i+1] - bi[i]; 4726 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4727 4728 /* add local non-zero vals of this proc's seqmat into ba */ 4729 anzi = ai[arow+1] - ai[arow]; 4730 aj = a->j + ai[arow]; 4731 aa = a->a + ai[arow]; 4732 nextaj = 0; 4733 for (j=0; nextaj<anzi; j++) { 4734 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4735 ba_i[j] += aa[nextaj++]; 4736 } 4737 } 4738 4739 /* add received vals into ba */ 4740 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4741 /* i-th row */ 4742 if (i == *nextrow[k]) { 4743 anzi = *(nextai[k]+1) - *nextai[k]; 4744 aj = buf_rj[k] + *(nextai[k]); 4745 aa = abuf_r[k] + *(nextai[k]); 4746 nextaj = 0; 4747 for (j=0; nextaj<anzi; j++) { 4748 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4749 ba_i[j] += aa[nextaj++]; 4750 } 4751 } 4752 nextrow[k]++; nextai[k]++; 4753 } 4754 } 4755 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4756 } 4757 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4758 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4759 4760 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4761 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4762 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4763 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4764 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4765 PetscFunctionReturn(0); 4766 } 4767 4768 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4769 4770 #undef __FUNCT__ 4771 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4772 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4773 { 4774 PetscErrorCode ierr; 4775 Mat B_mpi; 4776 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4777 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4778 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4779 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4780 PetscInt len,proc,*dnz,*onz,bs,cbs; 4781 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4782 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4783 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4784 MPI_Status *status; 4785 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4786 PetscBT lnkbt; 4787 Mat_Merge_SeqsToMPI *merge; 4788 PetscContainer container; 4789 4790 PetscFunctionBegin; 4791 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4792 4793 /* make sure it is a PETSc comm */ 4794 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4795 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4796 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4797 4798 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4799 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4800 4801 /* determine row ownership */ 4802 /*---------------------------------------------------------*/ 4803 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4804 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4805 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4806 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4807 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4808 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4809 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4810 4811 m = merge->rowmap->n; 4812 owners = merge->rowmap->range; 4813 4814 /* determine the number of messages to send, their lengths */ 4815 /*---------------------------------------------------------*/ 4816 len_s = merge->len_s; 4817 4818 len = 0; /* length of buf_si[] */ 4819 merge->nsend = 0; 4820 for (proc=0; proc<size; proc++) { 4821 len_si[proc] = 0; 4822 if (proc == rank) { 4823 len_s[proc] = 0; 4824 } else { 4825 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4826 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4827 } 4828 if (len_s[proc]) { 4829 merge->nsend++; 4830 nrows = 0; 4831 for (i=owners[proc]; i<owners[proc+1]; i++) { 4832 if (ai[i+1] > ai[i]) nrows++; 4833 } 4834 len_si[proc] = 2*(nrows+1); 4835 len += len_si[proc]; 4836 } 4837 } 4838 4839 /* determine the number and length of messages to receive for ij-structure */ 4840 /*-------------------------------------------------------------------------*/ 4841 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4842 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4843 4844 /* post the Irecv of j-structure */ 4845 /*-------------------------------*/ 4846 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4847 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4848 4849 /* post the Isend of j-structure */ 4850 /*--------------------------------*/ 4851 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4852 4853 for (proc=0, k=0; proc<size; proc++) { 4854 if (!len_s[proc]) continue; 4855 i = owners[proc]; 4856 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4857 k++; 4858 } 4859 4860 /* receives and sends of j-structure are complete */ 4861 /*------------------------------------------------*/ 4862 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4863 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4864 4865 /* send and recv i-structure */ 4866 /*---------------------------*/ 4867 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4868 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4869 4870 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4871 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4872 for (proc=0,k=0; proc<size; proc++) { 4873 if (!len_s[proc]) continue; 4874 /* form outgoing message for i-structure: 4875 buf_si[0]: nrows to be sent 4876 [1:nrows]: row index (global) 4877 [nrows+1:2*nrows+1]: i-structure index 4878 */ 4879 /*-------------------------------------------*/ 4880 nrows = len_si[proc]/2 - 1; 4881 buf_si_i = buf_si + nrows+1; 4882 buf_si[0] = nrows; 4883 buf_si_i[0] = 0; 4884 nrows = 0; 4885 for (i=owners[proc]; i<owners[proc+1]; i++) { 4886 anzi = ai[i+1] - ai[i]; 4887 if (anzi) { 4888 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4889 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4890 nrows++; 4891 } 4892 } 4893 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4894 k++; 4895 buf_si += len_si[proc]; 4896 } 4897 4898 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4899 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4900 4901 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4902 for (i=0; i<merge->nrecv; i++) { 4903 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4904 } 4905 4906 ierr = PetscFree(len_si);CHKERRQ(ierr); 4907 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4908 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4909 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4910 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4911 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4912 ierr = PetscFree(status);CHKERRQ(ierr); 4913 4914 /* compute a local seq matrix in each processor */ 4915 /*----------------------------------------------*/ 4916 /* allocate bi array and free space for accumulating nonzero column info */ 4917 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4918 bi[0] = 0; 4919 4920 /* create and initialize a linked list */ 4921 nlnk = N+1; 4922 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4923 4924 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4925 len = ai[owners[rank+1]] - ai[owners[rank]]; 4926 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4927 4928 current_space = free_space; 4929 4930 /* determine symbolic info for each local row */ 4931 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4932 4933 for (k=0; k<merge->nrecv; k++) { 4934 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4935 nrows = *buf_ri_k[k]; 4936 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4937 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4938 } 4939 4940 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4941 len = 0; 4942 for (i=0; i<m; i++) { 4943 bnzi = 0; 4944 /* add local non-zero cols of this proc's seqmat into lnk */ 4945 arow = owners[rank] + i; 4946 anzi = ai[arow+1] - ai[arow]; 4947 aj = a->j + ai[arow]; 4948 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4949 bnzi += nlnk; 4950 /* add received col data into lnk */ 4951 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4952 if (i == *nextrow[k]) { /* i-th row */ 4953 anzi = *(nextai[k]+1) - *nextai[k]; 4954 aj = buf_rj[k] + *nextai[k]; 4955 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4956 bnzi += nlnk; 4957 nextrow[k]++; nextai[k]++; 4958 } 4959 } 4960 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4961 4962 /* if free space is not available, make more free space */ 4963 if (current_space->local_remaining<bnzi) { 4964 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4965 nspacedouble++; 4966 } 4967 /* copy data into free space, then initialize lnk */ 4968 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4969 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4970 4971 current_space->array += bnzi; 4972 current_space->local_used += bnzi; 4973 current_space->local_remaining -= bnzi; 4974 4975 bi[i+1] = bi[i] + bnzi; 4976 } 4977 4978 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4979 4980 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4981 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4982 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4983 4984 /* create symbolic parallel matrix B_mpi */ 4985 /*---------------------------------------*/ 4986 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4987 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4988 if (n==PETSC_DECIDE) { 4989 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4990 } else { 4991 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4992 } 4993 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4994 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4995 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4996 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4997 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4998 4999 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5000 B_mpi->assembled = PETSC_FALSE; 5001 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 5002 merge->bi = bi; 5003 merge->bj = bj; 5004 merge->buf_ri = buf_ri; 5005 merge->buf_rj = buf_rj; 5006 merge->coi = NULL; 5007 merge->coj = NULL; 5008 merge->owners_co = NULL; 5009 5010 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5011 5012 /* attach the supporting struct to B_mpi for reuse */ 5013 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5014 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5015 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5016 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5017 *mpimat = B_mpi; 5018 5019 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5020 PetscFunctionReturn(0); 5021 } 5022 5023 #undef __FUNCT__ 5024 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 5025 /*@C 5026 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 5027 matrices from each processor 5028 5029 Collective on MPI_Comm 5030 5031 Input Parameters: 5032 + comm - the communicators the parallel matrix will live on 5033 . seqmat - the input sequential matrices 5034 . m - number of local rows (or PETSC_DECIDE) 5035 . n - number of local columns (or PETSC_DECIDE) 5036 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5037 5038 Output Parameter: 5039 . mpimat - the parallel matrix generated 5040 5041 Level: advanced 5042 5043 Notes: 5044 The dimensions of the sequential matrix in each processor MUST be the same. 5045 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5046 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5047 @*/ 5048 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5049 { 5050 PetscErrorCode ierr; 5051 PetscMPIInt size; 5052 5053 PetscFunctionBegin; 5054 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5055 if (size == 1) { 5056 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5057 if (scall == MAT_INITIAL_MATRIX) { 5058 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5059 } else { 5060 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5061 } 5062 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5063 PetscFunctionReturn(0); 5064 } 5065 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5066 if (scall == MAT_INITIAL_MATRIX) { 5067 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5068 } 5069 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5070 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5071 PetscFunctionReturn(0); 5072 } 5073 5074 #undef __FUNCT__ 5075 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5076 /*@ 5077 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5078 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5079 with MatGetSize() 5080 5081 Not Collective 5082 5083 Input Parameters: 5084 + A - the matrix 5085 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5086 5087 Output Parameter: 5088 . A_loc - the local sequential matrix generated 5089 5090 Level: developer 5091 5092 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5093 5094 @*/ 5095 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5096 { 5097 PetscErrorCode ierr; 5098 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5099 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5100 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5101 MatScalar *aa=a->a,*ba=b->a,*cam; 5102 PetscScalar *ca; 5103 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5104 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5105 PetscBool match; 5106 5107 PetscFunctionBegin; 5108 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5109 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5110 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5111 if (scall == MAT_INITIAL_MATRIX) { 5112 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5113 ci[0] = 0; 5114 for (i=0; i<am; i++) { 5115 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5116 } 5117 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5118 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5119 k = 0; 5120 for (i=0; i<am; i++) { 5121 ncols_o = bi[i+1] - bi[i]; 5122 ncols_d = ai[i+1] - ai[i]; 5123 /* off-diagonal portion of A */ 5124 for (jo=0; jo<ncols_o; jo++) { 5125 col = cmap[*bj]; 5126 if (col >= cstart) break; 5127 cj[k] = col; bj++; 5128 ca[k++] = *ba++; 5129 } 5130 /* diagonal portion of A */ 5131 for (j=0; j<ncols_d; j++) { 5132 cj[k] = cstart + *aj++; 5133 ca[k++] = *aa++; 5134 } 5135 /* off-diagonal portion of A */ 5136 for (j=jo; j<ncols_o; j++) { 5137 cj[k] = cmap[*bj++]; 5138 ca[k++] = *ba++; 5139 } 5140 } 5141 /* put together the new matrix */ 5142 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5143 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5144 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5145 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5146 mat->free_a = PETSC_TRUE; 5147 mat->free_ij = PETSC_TRUE; 5148 mat->nonew = 0; 5149 } else if (scall == MAT_REUSE_MATRIX) { 5150 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5151 ci = mat->i; cj = mat->j; cam = mat->a; 5152 for (i=0; i<am; i++) { 5153 /* off-diagonal portion of A */ 5154 ncols_o = bi[i+1] - bi[i]; 5155 for (jo=0; jo<ncols_o; jo++) { 5156 col = cmap[*bj]; 5157 if (col >= cstart) break; 5158 *cam++ = *ba++; bj++; 5159 } 5160 /* diagonal portion of A */ 5161 ncols_d = ai[i+1] - ai[i]; 5162 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5163 /* off-diagonal portion of A */ 5164 for (j=jo; j<ncols_o; j++) { 5165 *cam++ = *ba++; bj++; 5166 } 5167 } 5168 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5169 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5170 PetscFunctionReturn(0); 5171 } 5172 5173 #undef __FUNCT__ 5174 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5175 /*@C 5176 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5177 5178 Not Collective 5179 5180 Input Parameters: 5181 + A - the matrix 5182 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5183 - row, col - index sets of rows and columns to extract (or NULL) 5184 5185 Output Parameter: 5186 . A_loc - the local sequential matrix generated 5187 5188 Level: developer 5189 5190 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5191 5192 @*/ 5193 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5194 { 5195 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5196 PetscErrorCode ierr; 5197 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5198 IS isrowa,iscola; 5199 Mat *aloc; 5200 PetscBool match; 5201 5202 PetscFunctionBegin; 5203 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5204 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5205 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5206 if (!row) { 5207 start = A->rmap->rstart; end = A->rmap->rend; 5208 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5209 } else { 5210 isrowa = *row; 5211 } 5212 if (!col) { 5213 start = A->cmap->rstart; 5214 cmap = a->garray; 5215 nzA = a->A->cmap->n; 5216 nzB = a->B->cmap->n; 5217 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5218 ncols = 0; 5219 for (i=0; i<nzB; i++) { 5220 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5221 else break; 5222 } 5223 imark = i; 5224 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5225 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5226 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5227 } else { 5228 iscola = *col; 5229 } 5230 if (scall != MAT_INITIAL_MATRIX) { 5231 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5232 aloc[0] = *A_loc; 5233 } 5234 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5235 *A_loc = aloc[0]; 5236 ierr = PetscFree(aloc);CHKERRQ(ierr); 5237 if (!row) { 5238 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5239 } 5240 if (!col) { 5241 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5242 } 5243 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5244 PetscFunctionReturn(0); 5245 } 5246 5247 #undef __FUNCT__ 5248 #define __FUNCT__ "MatGetBrowsOfAcols" 5249 /*@C 5250 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5251 5252 Collective on Mat 5253 5254 Input Parameters: 5255 + A,B - the matrices in mpiaij format 5256 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5257 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5258 5259 Output Parameter: 5260 + rowb, colb - index sets of rows and columns of B to extract 5261 - B_seq - the sequential matrix generated 5262 5263 Level: developer 5264 5265 @*/ 5266 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5267 { 5268 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5269 PetscErrorCode ierr; 5270 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5271 IS isrowb,iscolb; 5272 Mat *bseq=NULL; 5273 5274 PetscFunctionBegin; 5275 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5276 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5277 } 5278 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5279 5280 if (scall == MAT_INITIAL_MATRIX) { 5281 start = A->cmap->rstart; 5282 cmap = a->garray; 5283 nzA = a->A->cmap->n; 5284 nzB = a->B->cmap->n; 5285 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5286 ncols = 0; 5287 for (i=0; i<nzB; i++) { /* row < local row index */ 5288 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5289 else break; 5290 } 5291 imark = i; 5292 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5293 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5294 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5295 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5296 } else { 5297 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5298 isrowb = *rowb; iscolb = *colb; 5299 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5300 bseq[0] = *B_seq; 5301 } 5302 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5303 *B_seq = bseq[0]; 5304 ierr = PetscFree(bseq);CHKERRQ(ierr); 5305 if (!rowb) { 5306 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5307 } else { 5308 *rowb = isrowb; 5309 } 5310 if (!colb) { 5311 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5312 } else { 5313 *colb = iscolb; 5314 } 5315 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5316 PetscFunctionReturn(0); 5317 } 5318 5319 #undef __FUNCT__ 5320 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5321 /* 5322 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5323 of the OFF-DIAGONAL portion of local A 5324 5325 Collective on Mat 5326 5327 Input Parameters: 5328 + A,B - the matrices in mpiaij format 5329 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5330 5331 Output Parameter: 5332 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5333 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5334 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5335 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5336 5337 Level: developer 5338 5339 */ 5340 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5341 { 5342 VecScatter_MPI_General *gen_to,*gen_from; 5343 PetscErrorCode ierr; 5344 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5345 Mat_SeqAIJ *b_oth; 5346 VecScatter ctx =a->Mvctx; 5347 MPI_Comm comm; 5348 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5349 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5350 PetscScalar *rvalues,*svalues; 5351 MatScalar *b_otha,*bufa,*bufA; 5352 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5353 MPI_Request *rwaits = NULL,*swaits = NULL; 5354 MPI_Status *sstatus,rstatus; 5355 PetscMPIInt jj; 5356 PetscInt *cols,sbs,rbs; 5357 PetscScalar *vals; 5358 5359 PetscFunctionBegin; 5360 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5361 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5362 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5363 } 5364 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5365 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5366 5367 gen_to = (VecScatter_MPI_General*)ctx->todata; 5368 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5369 rvalues = gen_from->values; /* holds the length of receiving row */ 5370 svalues = gen_to->values; /* holds the length of sending row */ 5371 nrecvs = gen_from->n; 5372 nsends = gen_to->n; 5373 5374 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5375 srow = gen_to->indices; /* local row index to be sent */ 5376 sstarts = gen_to->starts; 5377 sprocs = gen_to->procs; 5378 sstatus = gen_to->sstatus; 5379 sbs = gen_to->bs; 5380 rstarts = gen_from->starts; 5381 rprocs = gen_from->procs; 5382 rbs = gen_from->bs; 5383 5384 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5385 if (scall == MAT_INITIAL_MATRIX) { 5386 /* i-array */ 5387 /*---------*/ 5388 /* post receives */ 5389 for (i=0; i<nrecvs; i++) { 5390 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5391 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5392 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5393 } 5394 5395 /* pack the outgoing message */ 5396 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5397 5398 sstartsj[0] = 0; 5399 rstartsj[0] = 0; 5400 len = 0; /* total length of j or a array to be sent */ 5401 k = 0; 5402 for (i=0; i<nsends; i++) { 5403 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5404 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5405 for (j=0; j<nrows; j++) { 5406 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5407 for (l=0; l<sbs; l++) { 5408 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5409 5410 rowlen[j*sbs+l] = ncols; 5411 5412 len += ncols; 5413 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5414 } 5415 k++; 5416 } 5417 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5418 5419 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5420 } 5421 /* recvs and sends of i-array are completed */ 5422 i = nrecvs; 5423 while (i--) { 5424 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5425 } 5426 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5427 5428 /* allocate buffers for sending j and a arrays */ 5429 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5430 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5431 5432 /* create i-array of B_oth */ 5433 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5434 5435 b_othi[0] = 0; 5436 len = 0; /* total length of j or a array to be received */ 5437 k = 0; 5438 for (i=0; i<nrecvs; i++) { 5439 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5440 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5441 for (j=0; j<nrows; j++) { 5442 b_othi[k+1] = b_othi[k] + rowlen[j]; 5443 len += rowlen[j]; k++; 5444 } 5445 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5446 } 5447 5448 /* allocate space for j and a arrrays of B_oth */ 5449 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5450 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5451 5452 /* j-array */ 5453 /*---------*/ 5454 /* post receives of j-array */ 5455 for (i=0; i<nrecvs; i++) { 5456 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5457 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5458 } 5459 5460 /* pack the outgoing message j-array */ 5461 k = 0; 5462 for (i=0; i<nsends; i++) { 5463 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5464 bufJ = bufj+sstartsj[i]; 5465 for (j=0; j<nrows; j++) { 5466 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5467 for (ll=0; ll<sbs; ll++) { 5468 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5469 for (l=0; l<ncols; l++) { 5470 *bufJ++ = cols[l]; 5471 } 5472 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5473 } 5474 } 5475 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5476 } 5477 5478 /* recvs and sends of j-array are completed */ 5479 i = nrecvs; 5480 while (i--) { 5481 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5482 } 5483 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5484 } else if (scall == MAT_REUSE_MATRIX) { 5485 sstartsj = *startsj_s; 5486 rstartsj = *startsj_r; 5487 bufa = *bufa_ptr; 5488 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5489 b_otha = b_oth->a; 5490 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5491 5492 /* a-array */ 5493 /*---------*/ 5494 /* post receives of a-array */ 5495 for (i=0; i<nrecvs; i++) { 5496 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5497 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5498 } 5499 5500 /* pack the outgoing message a-array */ 5501 k = 0; 5502 for (i=0; i<nsends; i++) { 5503 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5504 bufA = bufa+sstartsj[i]; 5505 for (j=0; j<nrows; j++) { 5506 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5507 for (ll=0; ll<sbs; ll++) { 5508 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5509 for (l=0; l<ncols; l++) { 5510 *bufA++ = vals[l]; 5511 } 5512 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5513 } 5514 } 5515 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5516 } 5517 /* recvs and sends of a-array are completed */ 5518 i = nrecvs; 5519 while (i--) { 5520 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5521 } 5522 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5523 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5524 5525 if (scall == MAT_INITIAL_MATRIX) { 5526 /* put together the new matrix */ 5527 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5528 5529 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5530 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5531 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5532 b_oth->free_a = PETSC_TRUE; 5533 b_oth->free_ij = PETSC_TRUE; 5534 b_oth->nonew = 0; 5535 5536 ierr = PetscFree(bufj);CHKERRQ(ierr); 5537 if (!startsj_s || !bufa_ptr) { 5538 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5539 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5540 } else { 5541 *startsj_s = sstartsj; 5542 *startsj_r = rstartsj; 5543 *bufa_ptr = bufa; 5544 } 5545 } 5546 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5547 PetscFunctionReturn(0); 5548 } 5549 5550 #undef __FUNCT__ 5551 #define __FUNCT__ "MatGetCommunicationStructs" 5552 /*@C 5553 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5554 5555 Not Collective 5556 5557 Input Parameters: 5558 . A - The matrix in mpiaij format 5559 5560 Output Parameter: 5561 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5562 . colmap - A map from global column index to local index into lvec 5563 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5564 5565 Level: developer 5566 5567 @*/ 5568 #if defined(PETSC_USE_CTABLE) 5569 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5570 #else 5571 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5572 #endif 5573 { 5574 Mat_MPIAIJ *a; 5575 5576 PetscFunctionBegin; 5577 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5578 PetscValidPointer(lvec, 2); 5579 PetscValidPointer(colmap, 3); 5580 PetscValidPointer(multScatter, 4); 5581 a = (Mat_MPIAIJ*) A->data; 5582 if (lvec) *lvec = a->lvec; 5583 if (colmap) *colmap = a->colmap; 5584 if (multScatter) *multScatter = a->Mvctx; 5585 PetscFunctionReturn(0); 5586 } 5587 5588 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5589 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5590 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5591 5592 #undef __FUNCT__ 5593 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5594 /* 5595 Computes (B'*A')' since computing B*A directly is untenable 5596 5597 n p p 5598 ( ) ( ) ( ) 5599 m ( A ) * n ( B ) = m ( C ) 5600 ( ) ( ) ( ) 5601 5602 */ 5603 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5604 { 5605 PetscErrorCode ierr; 5606 Mat At,Bt,Ct; 5607 5608 PetscFunctionBegin; 5609 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5610 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5611 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5612 ierr = MatDestroy(&At);CHKERRQ(ierr); 5613 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5614 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5615 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5616 PetscFunctionReturn(0); 5617 } 5618 5619 #undef __FUNCT__ 5620 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5621 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5622 { 5623 PetscErrorCode ierr; 5624 PetscInt m=A->rmap->n,n=B->cmap->n; 5625 Mat Cmat; 5626 5627 PetscFunctionBegin; 5628 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5629 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5630 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5631 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5632 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5633 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5634 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5635 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5636 5637 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5638 5639 *C = Cmat; 5640 PetscFunctionReturn(0); 5641 } 5642 5643 /* ----------------------------------------------------------------*/ 5644 #undef __FUNCT__ 5645 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5646 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5647 { 5648 PetscErrorCode ierr; 5649 5650 PetscFunctionBegin; 5651 if (scall == MAT_INITIAL_MATRIX) { 5652 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5653 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5654 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5655 } 5656 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5657 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5658 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5659 PetscFunctionReturn(0); 5660 } 5661 5662 #if defined(PETSC_HAVE_MUMPS) 5663 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5664 #endif 5665 #if defined(PETSC_HAVE_PASTIX) 5666 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5667 #endif 5668 #if defined(PETSC_HAVE_SUPERLU_DIST) 5669 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5670 #endif 5671 #if defined(PETSC_HAVE_CLIQUE) 5672 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5673 #endif 5674 5675 /*MC 5676 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5677 5678 Options Database Keys: 5679 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5680 5681 Level: beginner 5682 5683 .seealso: MatCreateAIJ() 5684 M*/ 5685 5686 #undef __FUNCT__ 5687 #define __FUNCT__ "MatCreate_MPIAIJ" 5688 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5689 { 5690 Mat_MPIAIJ *b; 5691 PetscErrorCode ierr; 5692 PetscMPIInt size; 5693 5694 PetscFunctionBegin; 5695 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5696 5697 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5698 B->data = (void*)b; 5699 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5700 B->assembled = PETSC_FALSE; 5701 B->insertmode = NOT_SET_VALUES; 5702 b->size = size; 5703 5704 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5705 5706 /* build cache for off array entries formed */ 5707 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5708 5709 b->donotstash = PETSC_FALSE; 5710 b->colmap = 0; 5711 b->garray = 0; 5712 b->roworiented = PETSC_TRUE; 5713 5714 /* stuff used for matrix vector multiply */ 5715 b->lvec = NULL; 5716 b->Mvctx = NULL; 5717 5718 /* stuff for MatGetRow() */ 5719 b->rowindices = 0; 5720 b->rowvalues = 0; 5721 b->getrowactive = PETSC_FALSE; 5722 5723 /* flexible pointer used in CUSP/CUSPARSE classes */ 5724 b->spptr = NULL; 5725 5726 #if defined(PETSC_HAVE_MUMPS) 5727 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5728 #endif 5729 #if defined(PETSC_HAVE_PASTIX) 5730 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5731 #endif 5732 #if defined(PETSC_HAVE_SUPERLU_DIST) 5733 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5734 #endif 5735 #if defined(PETSC_HAVE_CLIQUE) 5736 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5737 #endif 5738 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5739 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5740 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5741 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5742 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5743 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5744 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5745 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5746 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5747 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5748 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5749 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5750 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5751 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5752 PetscFunctionReturn(0); 5753 } 5754 5755 #undef __FUNCT__ 5756 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5757 /*@ 5758 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5759 and "off-diagonal" part of the matrix in CSR format. 5760 5761 Collective on MPI_Comm 5762 5763 Input Parameters: 5764 + comm - MPI communicator 5765 . m - number of local rows (Cannot be PETSC_DECIDE) 5766 . n - This value should be the same as the local size used in creating the 5767 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5768 calculated if N is given) For square matrices n is almost always m. 5769 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5770 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5771 . i - row indices for "diagonal" portion of matrix 5772 . j - column indices 5773 . a - matrix values 5774 . oi - row indices for "off-diagonal" portion of matrix 5775 . oj - column indices 5776 - oa - matrix values 5777 5778 Output Parameter: 5779 . mat - the matrix 5780 5781 Level: advanced 5782 5783 Notes: 5784 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5785 must free the arrays once the matrix has been destroyed and not before. 5786 5787 The i and j indices are 0 based 5788 5789 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5790 5791 This sets local rows and cannot be used to set off-processor values. 5792 5793 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5794 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5795 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5796 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5797 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5798 communication if it is known that only local entries will be set. 5799 5800 .keywords: matrix, aij, compressed row, sparse, parallel 5801 5802 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5803 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5804 @*/ 5805 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5806 { 5807 PetscErrorCode ierr; 5808 Mat_MPIAIJ *maij; 5809 5810 PetscFunctionBegin; 5811 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5812 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5813 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5814 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5815 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5816 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5817 maij = (Mat_MPIAIJ*) (*mat)->data; 5818 5819 (*mat)->preallocated = PETSC_TRUE; 5820 5821 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5822 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5823 5824 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5825 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5826 5827 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5828 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5829 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5830 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5831 5832 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5833 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5834 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5835 PetscFunctionReturn(0); 5836 } 5837 5838 /* 5839 Special version for direct calls from Fortran 5840 */ 5841 #include <petsc-private/fortranimpl.h> 5842 5843 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5844 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5845 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5846 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5847 #endif 5848 5849 /* Change these macros so can be used in void function */ 5850 #undef CHKERRQ 5851 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5852 #undef SETERRQ2 5853 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5854 #undef SETERRQ3 5855 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5856 #undef SETERRQ 5857 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5858 5859 #undef __FUNCT__ 5860 #define __FUNCT__ "matsetvaluesmpiaij_" 5861 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5862 { 5863 Mat mat = *mmat; 5864 PetscInt m = *mm, n = *mn; 5865 InsertMode addv = *maddv; 5866 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5867 PetscScalar value; 5868 PetscErrorCode ierr; 5869 5870 MatCheckPreallocated(mat,1); 5871 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5872 5873 #if defined(PETSC_USE_DEBUG) 5874 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5875 #endif 5876 { 5877 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5878 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5879 PetscBool roworiented = aij->roworiented; 5880 5881 /* Some Variables required in the macro */ 5882 Mat A = aij->A; 5883 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5884 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5885 MatScalar *aa = a->a; 5886 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5887 Mat B = aij->B; 5888 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5889 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5890 MatScalar *ba = b->a; 5891 5892 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5893 PetscInt nonew = a->nonew; 5894 MatScalar *ap1,*ap2; 5895 5896 PetscFunctionBegin; 5897 for (i=0; i<m; i++) { 5898 if (im[i] < 0) continue; 5899 #if defined(PETSC_USE_DEBUG) 5900 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5901 #endif 5902 if (im[i] >= rstart && im[i] < rend) { 5903 row = im[i] - rstart; 5904 lastcol1 = -1; 5905 rp1 = aj + ai[row]; 5906 ap1 = aa + ai[row]; 5907 rmax1 = aimax[row]; 5908 nrow1 = ailen[row]; 5909 low1 = 0; 5910 high1 = nrow1; 5911 lastcol2 = -1; 5912 rp2 = bj + bi[row]; 5913 ap2 = ba + bi[row]; 5914 rmax2 = bimax[row]; 5915 nrow2 = bilen[row]; 5916 low2 = 0; 5917 high2 = nrow2; 5918 5919 for (j=0; j<n; j++) { 5920 if (roworiented) value = v[i*n+j]; 5921 else value = v[i+j*m]; 5922 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5923 if (in[j] >= cstart && in[j] < cend) { 5924 col = in[j] - cstart; 5925 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5926 } else if (in[j] < 0) continue; 5927 #if defined(PETSC_USE_DEBUG) 5928 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5929 #endif 5930 else { 5931 if (mat->was_assembled) { 5932 if (!aij->colmap) { 5933 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5934 } 5935 #if defined(PETSC_USE_CTABLE) 5936 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5937 col--; 5938 #else 5939 col = aij->colmap[in[j]] - 1; 5940 #endif 5941 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5942 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5943 col = in[j]; 5944 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5945 B = aij->B; 5946 b = (Mat_SeqAIJ*)B->data; 5947 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5948 rp2 = bj + bi[row]; 5949 ap2 = ba + bi[row]; 5950 rmax2 = bimax[row]; 5951 nrow2 = bilen[row]; 5952 low2 = 0; 5953 high2 = nrow2; 5954 bm = aij->B->rmap->n; 5955 ba = b->a; 5956 } 5957 } else col = in[j]; 5958 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5959 } 5960 } 5961 } else if (!aij->donotstash) { 5962 if (roworiented) { 5963 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5964 } else { 5965 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5966 } 5967 } 5968 } 5969 } 5970 PetscFunctionReturnVoid(); 5971 } 5972 5973