1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for (j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 110 PetscErrorCode ierr; 111 PetscInt i,rstart,nrows,*rows; 112 113 PetscFunctionBegin; 114 *zrows = PETSC_NULL; 115 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 116 ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr); 117 for (i=0; i<nrows; i++) rows[i] += rstart; 118 ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 125 { 126 PetscErrorCode ierr; 127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 128 PetscInt i,n,*garray = aij->garray; 129 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 130 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 131 PetscReal *work; 132 133 PetscFunctionBegin; 134 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 135 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 136 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 137 if (type == NORM_2) { 138 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 139 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 140 } 141 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 142 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 143 } 144 } else if (type == NORM_1) { 145 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 146 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 147 } 148 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 149 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 150 } 151 } else if (type == NORM_INFINITY) { 152 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 153 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 154 } 155 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 156 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 157 } 158 159 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 160 if (type == NORM_INFINITY) { 161 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 162 } else { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 164 } 165 ierr = PetscFree(work);CHKERRQ(ierr); 166 if (type == NORM_2) { 167 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatDistribute_MPIAIJ" 174 /* 175 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 176 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 177 178 Only for square matrices 179 */ 180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 181 { 182 PetscMPIInt rank,size; 183 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 184 PetscErrorCode ierr; 185 Mat mat; 186 Mat_SeqAIJ *gmata; 187 PetscMPIInt tag; 188 MPI_Status status; 189 PetscBool aij; 190 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 191 192 PetscFunctionBegin; 193 CHKMEMQ; 194 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 196 if (!rank) { 197 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 198 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 199 } 200 if (reuse == MAT_INITIAL_MATRIX) { 201 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 202 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 203 if (!rank) { 204 bses[0] = gmat->rmap->bs; 205 bses[1] = gmat->cmap->bs; 206 } 207 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 208 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 209 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 210 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 211 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 212 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) { 215 rowners[i] += rowners[i-1]; 216 } 217 rstart = rowners[rank]; 218 rend = rowners[rank+1]; 219 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 220 if (!rank) { 221 gmata = (Mat_SeqAIJ*) gmat->data; 222 /* send row lengths to all processors */ 223 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 224 for (i=1; i<size; i++) { 225 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 226 } 227 /* determine number diagonal and off-diagonal counts */ 228 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 229 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 230 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 231 jj = 0; 232 for (i=0; i<m; i++) { 233 for (j=0; j<dlens[i]; j++) { 234 if (gmata->j[jj] < rstart) ld[i]++; 235 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 236 jj++; 237 } 238 } 239 /* send column indices to other processes */ 240 for (i=1; i<size; i++) { 241 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 242 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 } 245 246 /* send numerical values to other processes */ 247 for (i=1; i<size; i++) { 248 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 249 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 250 } 251 gmataa = gmata->a; 252 gmataj = gmata->j; 253 254 } else { 255 /* receive row lengths */ 256 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 257 /* receive column indices */ 258 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 260 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 /* determine number diagonal and off-diagonal counts */ 262 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 264 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 265 jj = 0; 266 for (i=0; i<m; i++) { 267 for (j=0; j<dlens[i]; j++) { 268 if (gmataj[jj] < rstart) ld[i]++; 269 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 270 jj++; 271 } 272 } 273 /* receive numerical values */ 274 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 275 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 276 } 277 /* set preallocation */ 278 for (i=0; i<m; i++) { 279 dlens[i] -= olens[i]; 280 } 281 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 282 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 283 284 for (i=0; i<m; i++) { 285 dlens[i] += olens[i]; 286 } 287 cnt = 0; 288 for (i=0; i<m; i++) { 289 row = rstart + i; 290 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 291 cnt += dlens[i]; 292 } 293 if (rank) { 294 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 295 } 296 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 297 ierr = PetscFree(rowners);CHKERRQ(ierr); 298 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 299 *inmat = mat; 300 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 301 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 302 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 303 mat = *inmat; 304 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 305 if (!rank) { 306 /* send numerical values to other processes */ 307 gmata = (Mat_SeqAIJ*) gmat->data; 308 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 309 gmataa = gmata->a; 310 for (i=1; i<size; i++) { 311 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 312 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 313 } 314 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 315 } else { 316 /* receive numerical values from process 0*/ 317 nz = Ad->nz + Ao->nz; 318 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 319 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 320 } 321 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 322 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 323 ad = Ad->a; 324 ao = Ao->a; 325 if (mat->rmap->n) { 326 i = 0; 327 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 328 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 329 } 330 for (i=1; i<mat->rmap->n; i++) { 331 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 i--; 335 if (mat->rmap->n) { 336 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 337 } 338 if (rank) { 339 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 340 } 341 } 342 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 CHKMEMQ; 345 PetscFunctionReturn(0); 346 } 347 348 /* 349 Local utility routine that creates a mapping from the global column 350 number to the local number in the off-diagonal part of the local 351 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 352 a slightly higher hash table cost; without it it is not scalable (each processor 353 has an order N integer array but is fast to acess. 354 */ 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 358 { 359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 360 PetscErrorCode ierr; 361 PetscInt n = aij->B->cmap->n,i; 362 363 PetscFunctionBegin; 364 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 365 #if defined (PETSC_USE_CTABLE) 366 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 367 for (i=0; i<n; i++){ 368 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 369 } 370 #else 371 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 372 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 373 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 374 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 375 #endif 376 PetscFunctionReturn(0); 377 } 378 379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 380 { \ 381 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 382 lastcol1 = col;\ 383 while (high1-low1 > 5) { \ 384 t = (low1+high1)/2; \ 385 if (rp1[t] > col) high1 = t; \ 386 else low1 = t; \ 387 } \ 388 for (_i=low1; _i<high1; _i++) { \ 389 if (rp1[_i] > col) break; \ 390 if (rp1[_i] == col) { \ 391 if (addv == ADD_VALUES) ap1[_i] += value; \ 392 else ap1[_i] = value; \ 393 goto a_noinsert; \ 394 } \ 395 } \ 396 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 397 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 398 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 399 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 400 N = nrow1++ - 1; a->nz++; high1++; \ 401 /* shift up all the later entries in this row */ \ 402 for (ii=N; ii>=_i; ii--) { \ 403 rp1[ii+1] = rp1[ii]; \ 404 ap1[ii+1] = ap1[ii]; \ 405 } \ 406 rp1[_i] = col; \ 407 ap1[_i] = value; \ 408 a_noinsert: ; \ 409 ailen[row] = nrow1; \ 410 } 411 412 413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 414 { \ 415 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 416 lastcol2 = col;\ 417 while (high2-low2 > 5) { \ 418 t = (low2+high2)/2; \ 419 if (rp2[t] > col) high2 = t; \ 420 else low2 = t; \ 421 } \ 422 for (_i=low2; _i<high2; _i++) { \ 423 if (rp2[_i] > col) break; \ 424 if (rp2[_i] == col) { \ 425 if (addv == ADD_VALUES) ap2[_i] += value; \ 426 else ap2[_i] = value; \ 427 goto b_noinsert; \ 428 } \ 429 } \ 430 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 432 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 433 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 434 N = nrow2++ - 1; b->nz++; high2++; \ 435 /* shift up all the later entries in this row */ \ 436 for (ii=N; ii>=_i; ii--) { \ 437 rp2[ii+1] = rp2[ii]; \ 438 ap2[ii+1] = ap2[ii]; \ 439 } \ 440 rp2[_i] = col; \ 441 ap2[_i] = value; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 if (v) PetscValidScalarPointer(v,6); 502 for (i=0; i<m; i++) { 503 if (im[i] < 0) continue; 504 #if defined(PETSC_USE_DEBUG) 505 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 506 #endif 507 if (im[i] >= rstart && im[i] < rend) { 508 row = im[i] - rstart; 509 lastcol1 = -1; 510 rp1 = aj + ai[row]; 511 ap1 = aa + ai[row]; 512 rmax1 = aimax[row]; 513 nrow1 = ailen[row]; 514 low1 = 0; 515 high1 = nrow1; 516 lastcol2 = -1; 517 rp2 = bj + bi[row]; 518 ap2 = ba + bi[row]; 519 rmax2 = bimax[row]; 520 nrow2 = bilen[row]; 521 low2 = 0; 522 high2 = nrow2; 523 524 for (j=0; j<n; j++) { 525 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend){ 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined (PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend){ 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined (PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col --; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else { 620 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 621 } 622 } 623 PetscFunctionReturn(0); 624 } 625 626 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 627 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 630 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 631 { 632 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 633 PetscErrorCode ierr; 634 PetscInt nstash,reallocs; 635 InsertMode addv; 636 637 PetscFunctionBegin; 638 if (aij->donotstash || mat->nooffprocentries) { 639 PetscFunctionReturn(0); 640 } 641 642 /* make sure all processors are either in INSERTMODE or ADDMODE */ 643 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 644 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 645 mat->insertmode = addv; /* in case this processor had no cache */ 646 647 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 648 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 649 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 656 { 657 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 658 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 659 PetscErrorCode ierr; 660 PetscMPIInt n; 661 PetscInt i,j,rstart,ncols,flg; 662 PetscInt *row,*col; 663 PetscBool other_disassembled; 664 PetscScalar *val; 665 InsertMode addv = mat->insertmode; 666 667 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 668 PetscFunctionBegin; 669 if (!aij->donotstash && !mat->nooffprocentries) { 670 while (1) { 671 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 672 if (!flg) break; 673 674 for (i=0; i<n;) { 675 /* Now identify the consecutive vals belonging to the same row */ 676 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 677 if (j < n) ncols = j-i; 678 else ncols = n-i; 679 /* Now assemble all these values with a single function call */ 680 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 681 i = j; 682 } 683 } 684 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 685 } 686 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 688 689 /* determine if any processor has disassembled, if so we must 690 also disassemble ourselfs, in order that we may reassemble. */ 691 /* 692 if nonzero structure of submatrix B cannot change then we know that 693 no processor disassembled thus we can skip this stuff 694 */ 695 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 696 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 697 if (mat->was_assembled && !other_disassembled) { 698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 699 } 700 } 701 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 702 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 705 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 706 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 707 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 708 709 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 710 aij->rowvalues = 0; 711 712 /* used by MatAXPY() */ 713 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 714 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 715 716 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 717 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 718 PetscFunctionReturn(0); 719 } 720 721 #undef __FUNCT__ 722 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 723 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 724 { 725 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 726 PetscErrorCode ierr; 727 728 PetscFunctionBegin; 729 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 730 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 731 PetscFunctionReturn(0); 732 } 733 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatZeroRows_MPIAIJ" 736 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 737 { 738 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 739 PetscErrorCode ierr; 740 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 741 PetscInt i,*owners = A->rmap->range; 742 PetscInt *nprocs,j,idx,nsends,row; 743 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 744 PetscInt *rvalues,count,base,slen,*source; 745 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 746 MPI_Comm comm = ((PetscObject)A)->comm; 747 MPI_Request *send_waits,*recv_waits; 748 MPI_Status recv_status,*send_status; 749 const PetscScalar *xx; 750 PetscScalar *bb; 751 #if defined(PETSC_DEBUG) 752 PetscBool found = PETSC_FALSE; 753 #endif 754 755 PetscFunctionBegin; 756 /* first count number of contributors to each processor */ 757 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 758 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 759 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 760 j = 0; 761 for (i=0; i<N; i++) { 762 if (lastidx > (idx = rows[i])) j = 0; 763 lastidx = idx; 764 for (; j<size; j++) { 765 if (idx >= owners[j] && idx < owners[j+1]) { 766 nprocs[2*j]++; 767 nprocs[2*j+1] = 1; 768 owner[i] = j; 769 #if defined(PETSC_DEBUG) 770 found = PETSC_TRUE; 771 #endif 772 break; 773 } 774 } 775 #if defined(PETSC_DEBUG) 776 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 777 found = PETSC_FALSE; 778 #endif 779 } 780 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 781 782 if (A->nooffproczerorows) { 783 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 784 nrecvs = nsends; 785 nmax = N; 786 } else { 787 /* inform other processors of number of messages and max length*/ 788 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 789 } 790 791 /* post receives: */ 792 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 793 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 794 for (i=0; i<nrecvs; i++) { 795 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 796 } 797 798 /* do sends: 799 1) starts[i] gives the starting index in svalues for stuff going to 800 the ith processor 801 */ 802 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 803 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 804 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 805 starts[0] = 0; 806 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 807 for (i=0; i<N; i++) { 808 svalues[starts[owner[i]]++] = rows[i]; 809 } 810 811 starts[0] = 0; 812 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 813 count = 0; 814 for (i=0; i<size; i++) { 815 if (nprocs[2*i+1]) { 816 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 817 } 818 } 819 ierr = PetscFree(starts);CHKERRQ(ierr); 820 821 base = owners[rank]; 822 823 /* wait on receives */ 824 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 825 count = nrecvs; slen = 0; 826 while (count) { 827 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 828 /* unpack receives into our local space */ 829 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 830 source[imdex] = recv_status.MPI_SOURCE; 831 lens[imdex] = n; 832 slen += n; 833 count--; 834 } 835 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 836 837 /* move the data into the send scatter */ 838 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 839 count = 0; 840 for (i=0; i<nrecvs; i++) { 841 values = rvalues + i*nmax; 842 for (j=0; j<lens[i]; j++) { 843 lrows[count++] = values[j] - base; 844 } 845 } 846 ierr = PetscFree(rvalues);CHKERRQ(ierr); 847 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 848 ierr = PetscFree(owner);CHKERRQ(ierr); 849 ierr = PetscFree(nprocs);CHKERRQ(ierr); 850 851 /* fix right hand side if needed */ 852 if (x && b) { 853 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 854 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 855 for (i=0; i<slen; i++) { 856 bb[lrows[i]] = diag*xx[lrows[i]]; 857 } 858 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 859 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 860 } 861 /* 862 Zero the required rows. If the "diagonal block" of the matrix 863 is square and the user wishes to set the diagonal we use separate 864 code so that MatSetValues() is not called for each diagonal allocating 865 new memory, thus calling lots of mallocs and slowing things down. 866 867 */ 868 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 869 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 870 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 871 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 872 } else if (diag != 0.0) { 873 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 874 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 875 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 877 } 878 for (i = 0; i < slen; i++) { 879 row = lrows[i] + rstart; 880 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 881 } 882 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 883 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 884 } else { 885 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 886 } 887 ierr = PetscFree(lrows);CHKERRQ(ierr); 888 889 /* wait on sends */ 890 if (nsends) { 891 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 892 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 893 ierr = PetscFree(send_status);CHKERRQ(ierr); 894 } 895 ierr = PetscFree(send_waits);CHKERRQ(ierr); 896 ierr = PetscFree(svalues);CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 #undef __FUNCT__ 901 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 902 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 903 { 904 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 905 PetscErrorCode ierr; 906 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 907 PetscInt i,*owners = A->rmap->range; 908 PetscInt *nprocs,j,idx,nsends; 909 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 910 PetscInt *rvalues,count,base,slen,*source; 911 PetscInt *lens,*lrows,*values,m; 912 MPI_Comm comm = ((PetscObject)A)->comm; 913 MPI_Request *send_waits,*recv_waits; 914 MPI_Status recv_status,*send_status; 915 const PetscScalar *xx; 916 PetscScalar *bb,*mask; 917 Vec xmask,lmask; 918 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 919 const PetscInt *aj, *ii,*ridx; 920 PetscScalar *aa; 921 #if defined(PETSC_DEBUG) 922 PetscBool found = PETSC_FALSE; 923 #endif 924 925 PetscFunctionBegin; 926 /* first count number of contributors to each processor */ 927 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 928 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 929 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 930 j = 0; 931 for (i=0; i<N; i++) { 932 if (lastidx > (idx = rows[i])) j = 0; 933 lastidx = idx; 934 for (; j<size; j++) { 935 if (idx >= owners[j] && idx < owners[j+1]) { 936 nprocs[2*j]++; 937 nprocs[2*j+1] = 1; 938 owner[i] = j; 939 #if defined(PETSC_DEBUG) 940 found = PETSC_TRUE; 941 #endif 942 break; 943 } 944 } 945 #if defined(PETSC_DEBUG) 946 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 947 found = PETSC_FALSE; 948 #endif 949 } 950 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 951 952 /* inform other processors of number of messages and max length*/ 953 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 954 955 /* post receives: */ 956 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 957 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 958 for (i=0; i<nrecvs; i++) { 959 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 960 } 961 962 /* do sends: 963 1) starts[i] gives the starting index in svalues for stuff going to 964 the ith processor 965 */ 966 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 967 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 968 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 969 starts[0] = 0; 970 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 971 for (i=0; i<N; i++) { 972 svalues[starts[owner[i]]++] = rows[i]; 973 } 974 975 starts[0] = 0; 976 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 977 count = 0; 978 for (i=0; i<size; i++) { 979 if (nprocs[2*i+1]) { 980 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 981 } 982 } 983 ierr = PetscFree(starts);CHKERRQ(ierr); 984 985 base = owners[rank]; 986 987 /* wait on receives */ 988 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 989 count = nrecvs; slen = 0; 990 while (count) { 991 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 992 /* unpack receives into our local space */ 993 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 994 source[imdex] = recv_status.MPI_SOURCE; 995 lens[imdex] = n; 996 slen += n; 997 count--; 998 } 999 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1000 1001 /* move the data into the send scatter */ 1002 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1003 count = 0; 1004 for (i=0; i<nrecvs; i++) { 1005 values = rvalues + i*nmax; 1006 for (j=0; j<lens[i]; j++) { 1007 lrows[count++] = values[j] - base; 1008 } 1009 } 1010 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1011 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1012 ierr = PetscFree(owner);CHKERRQ(ierr); 1013 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1014 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1015 1016 /* zero diagonal part of matrix */ 1017 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1018 1019 /* handle off diagonal part of matrix */ 1020 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 1021 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1022 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1023 for (i=0; i<slen; i++) { 1024 bb[lrows[i]] = 1; 1025 } 1026 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1027 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1030 if (x) { 1031 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1034 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1035 } 1036 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1037 1038 /* remove zeroed rows of off diagonal matrix */ 1039 ii = aij->i; 1040 for (i=0; i<slen; i++) { 1041 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1042 } 1043 1044 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1045 if (aij->compressedrow.use){ 1046 m = aij->compressedrow.nrows; 1047 ii = aij->compressedrow.i; 1048 ridx = aij->compressedrow.rindex; 1049 for (i=0; i<m; i++){ 1050 n = ii[i+1] - ii[i]; 1051 aj = aij->j + ii[i]; 1052 aa = aij->a + ii[i]; 1053 1054 for (j=0; j<n; j++) { 1055 if (PetscAbsScalar(mask[*aj])) { 1056 if (b) bb[*ridx] -= *aa*xx[*aj]; 1057 *aa = 0.0; 1058 } 1059 aa++; 1060 aj++; 1061 } 1062 ridx++; 1063 } 1064 } else { /* do not use compressed row format */ 1065 m = l->B->rmap->n; 1066 for (i=0; i<m; i++) { 1067 n = ii[i+1] - ii[i]; 1068 aj = aij->j + ii[i]; 1069 aa = aij->a + ii[i]; 1070 for (j=0; j<n; j++) { 1071 if (PetscAbsScalar(mask[*aj])) { 1072 if (b) bb[i] -= *aa*xx[*aj]; 1073 *aa = 0.0; 1074 } 1075 aa++; 1076 aj++; 1077 } 1078 } 1079 } 1080 if (x) { 1081 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1082 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1083 } 1084 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1085 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1086 ierr = PetscFree(lrows);CHKERRQ(ierr); 1087 1088 /* wait on sends */ 1089 if (nsends) { 1090 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1091 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1092 ierr = PetscFree(send_status);CHKERRQ(ierr); 1093 } 1094 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1095 ierr = PetscFree(svalues);CHKERRQ(ierr); 1096 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMult_MPIAIJ" 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 1108 PetscFunctionBegin; 1109 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1110 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscBool merged; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 if (!merged) { 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* added in yy until the next line, */ 1164 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 } else { 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 /* values actually were received in the Begin() but we need to call this nop */ 1171 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 EXTERN_C_BEGIN 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1179 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1180 { 1181 MPI_Comm comm; 1182 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1183 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1184 IS Me,Notme; 1185 PetscErrorCode ierr; 1186 PetscInt M,N,first,last,*notme,i; 1187 PetscMPIInt size; 1188 1189 PetscFunctionBegin; 1190 1191 /* Easy test: symmetric diagonal block */ 1192 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1193 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1194 if (!*f) PetscFunctionReturn(0); 1195 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1196 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1197 if (size == 1) PetscFunctionReturn(0); 1198 1199 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1200 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1201 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1202 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1203 for (i=0; i<first; i++) notme[i] = i; 1204 for (i=last; i<M; i++) notme[i-last+first] = i; 1205 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1206 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1207 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1208 Aoff = Aoffs[0]; 1209 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1210 Boff = Boffs[0]; 1211 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1212 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1213 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1214 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1215 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1216 ierr = PetscFree(notme);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 EXTERN_C_END 1220 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1223 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1224 { 1225 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 /* do nondiagonal part */ 1230 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1231 /* send it on its way */ 1232 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1233 /* do local part */ 1234 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1235 /* receive remote parts */ 1236 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 /* 1241 This only works correctly for square matrices where the subblock A->A is the 1242 diagonal block 1243 */ 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1246 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1247 { 1248 PetscErrorCode ierr; 1249 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1250 1251 PetscFunctionBegin; 1252 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1253 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1254 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatScale_MPIAIJ" 1260 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1261 { 1262 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1263 PetscErrorCode ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1267 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatDestroy_MPIAIJ" 1273 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1274 { 1275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1276 PetscErrorCode ierr; 1277 1278 PetscFunctionBegin; 1279 #if defined(PETSC_USE_LOG) 1280 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1281 #endif 1282 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1283 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1284 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1285 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1286 #if defined (PETSC_USE_CTABLE) 1287 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1288 #else 1289 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1290 #endif 1291 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1292 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1293 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1294 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1295 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1296 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1297 1298 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1312 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1313 { 1314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1315 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1316 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1317 PetscErrorCode ierr; 1318 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1319 int fd; 1320 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1321 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1322 PetscScalar *column_values; 1323 PetscInt message_count,flowcontrolcount; 1324 FILE *file; 1325 1326 PetscFunctionBegin; 1327 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1328 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1329 nz = A->nz + B->nz; 1330 if (!rank) { 1331 header[0] = MAT_FILE_CLASSID; 1332 header[1] = mat->rmap->N; 1333 header[2] = mat->cmap->N; 1334 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1335 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1336 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1337 /* get largest number of rows any processor has */ 1338 rlen = mat->rmap->n; 1339 range = mat->rmap->range; 1340 for (i=1; i<size; i++) { 1341 rlen = PetscMax(rlen,range[i+1] - range[i]); 1342 } 1343 } else { 1344 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1345 rlen = mat->rmap->n; 1346 } 1347 1348 /* load up the local row counts */ 1349 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1350 for (i=0; i<mat->rmap->n; i++) { 1351 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1352 } 1353 1354 /* store the row lengths to the file */ 1355 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1356 if (!rank) { 1357 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1358 for (i=1; i<size; i++) { 1359 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1360 rlen = range[i+1] - range[i]; 1361 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1362 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1363 } 1364 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1365 } else { 1366 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1367 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1368 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1369 } 1370 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1371 1372 /* load up the local column indices */ 1373 nzmax = nz; /* )th processor needs space a largest processor needs */ 1374 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1375 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1376 cnt = 0; 1377 for (i=0; i<mat->rmap->n; i++) { 1378 for (j=B->i[i]; j<B->i[i+1]; j++) { 1379 if ( (col = garray[B->j[j]]) > cstart) break; 1380 column_indices[cnt++] = col; 1381 } 1382 for (k=A->i[i]; k<A->i[i+1]; k++) { 1383 column_indices[cnt++] = A->j[k] + cstart; 1384 } 1385 for (; j<B->i[i+1]; j++) { 1386 column_indices[cnt++] = garray[B->j[j]]; 1387 } 1388 } 1389 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1390 1391 /* store the column indices to the file */ 1392 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1393 if (!rank) { 1394 MPI_Status status; 1395 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1396 for (i=1; i<size; i++) { 1397 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1398 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1399 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1400 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1401 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1402 } 1403 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1404 } else { 1405 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1406 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1407 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1408 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1409 } 1410 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1411 1412 /* load up the local column values */ 1413 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1414 cnt = 0; 1415 for (i=0; i<mat->rmap->n; i++) { 1416 for (j=B->i[i]; j<B->i[i+1]; j++) { 1417 if ( garray[B->j[j]] > cstart) break; 1418 column_values[cnt++] = B->a[j]; 1419 } 1420 for (k=A->i[i]; k<A->i[i+1]; k++) { 1421 column_values[cnt++] = A->a[k]; 1422 } 1423 for (; j<B->i[i+1]; j++) { 1424 column_values[cnt++] = B->a[j]; 1425 } 1426 } 1427 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1428 1429 /* store the column values to the file */ 1430 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1431 if (!rank) { 1432 MPI_Status status; 1433 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1434 for (i=1; i<size; i++) { 1435 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1436 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1437 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1438 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1439 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1440 } 1441 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1442 } else { 1443 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1444 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1445 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1446 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1447 } 1448 ierr = PetscFree(column_values);CHKERRQ(ierr); 1449 1450 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1451 if (file) { 1452 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1453 } 1454 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1460 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1461 { 1462 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1463 PetscErrorCode ierr; 1464 PetscMPIInt rank = aij->rank,size = aij->size; 1465 PetscBool isdraw,iascii,isbinary; 1466 PetscViewer sviewer; 1467 PetscViewerFormat format; 1468 1469 PetscFunctionBegin; 1470 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1471 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1472 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1473 if (iascii) { 1474 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1475 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1476 MatInfo info; 1477 PetscBool inodes; 1478 1479 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1480 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1481 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1482 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1483 if (!inodes) { 1484 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1485 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1486 } else { 1487 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1488 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1489 } 1490 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1491 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1492 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1493 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1494 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1495 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1496 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1497 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1500 PetscInt inodecount,inodelimit,*inodes; 1501 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1502 if (inodes) { 1503 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1504 } else { 1505 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1506 } 1507 PetscFunctionReturn(0); 1508 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1509 PetscFunctionReturn(0); 1510 } 1511 } else if (isbinary) { 1512 if (size == 1) { 1513 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1514 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1515 } else { 1516 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1517 } 1518 PetscFunctionReturn(0); 1519 } else if (isdraw) { 1520 PetscDraw draw; 1521 PetscBool isnull; 1522 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1523 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1524 } 1525 1526 if (size == 1) { 1527 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1528 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1529 } else { 1530 /* assemble the entire matrix onto first processor. */ 1531 Mat A; 1532 Mat_SeqAIJ *Aloc; 1533 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1534 MatScalar *a; 1535 1536 if (mat->rmap->N > 1024) { 1537 PetscBool flg = PETSC_FALSE; 1538 1539 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1540 if (!flg) { 1541 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1542 } 1543 } 1544 1545 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1546 if (!rank) { 1547 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1548 } else { 1549 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1550 } 1551 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1552 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1553 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1554 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1555 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1556 1557 /* copy over the A part */ 1558 Aloc = (Mat_SeqAIJ*)aij->A->data; 1559 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1560 row = mat->rmap->rstart; 1561 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1562 for (i=0; i<m; i++) { 1563 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1564 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1565 } 1566 aj = Aloc->j; 1567 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1568 1569 /* copy over the B part */ 1570 Aloc = (Mat_SeqAIJ*)aij->B->data; 1571 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1572 row = mat->rmap->rstart; 1573 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1574 ct = cols; 1575 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1576 for (i=0; i<m; i++) { 1577 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1578 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1579 } 1580 ierr = PetscFree(ct);CHKERRQ(ierr); 1581 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1582 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1583 /* 1584 Everyone has to call to draw the matrix since the graphics waits are 1585 synchronized across all processors that share the PetscDraw object 1586 */ 1587 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1588 if (!rank) { 1589 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1590 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1591 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1592 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1593 } 1594 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1595 ierr = MatDestroy(&A);CHKERRQ(ierr); 1596 } 1597 PetscFunctionReturn(0); 1598 } 1599 1600 #undef __FUNCT__ 1601 #define __FUNCT__ "MatView_MPIAIJ" 1602 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1603 { 1604 PetscErrorCode ierr; 1605 PetscBool iascii,isdraw,issocket,isbinary; 1606 1607 PetscFunctionBegin; 1608 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1609 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1610 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1611 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1612 if (iascii || isdraw || isbinary || issocket) { 1613 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1614 } else { 1615 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1616 } 1617 PetscFunctionReturn(0); 1618 } 1619 1620 #undef __FUNCT__ 1621 #define __FUNCT__ "MatSOR_MPIAIJ" 1622 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1623 { 1624 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1625 PetscErrorCode ierr; 1626 Vec bb1 = 0; 1627 PetscBool hasop; 1628 1629 PetscFunctionBegin; 1630 if (flag == SOR_APPLY_UPPER) { 1631 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 1635 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1636 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1637 } 1638 1639 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1640 if (flag & SOR_ZERO_INITIAL_GUESS) { 1641 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1642 its--; 1643 } 1644 1645 while (its--) { 1646 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1647 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1648 1649 /* update rhs: bb1 = bb - B*x */ 1650 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1651 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1652 1653 /* local sweep */ 1654 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1655 } 1656 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1657 if (flag & SOR_ZERO_INITIAL_GUESS) { 1658 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1659 its--; 1660 } 1661 while (its--) { 1662 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1663 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1664 1665 /* update rhs: bb1 = bb - B*x */ 1666 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1667 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1668 1669 /* local sweep */ 1670 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1671 } 1672 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1673 if (flag & SOR_ZERO_INITIAL_GUESS) { 1674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1675 its--; 1676 } 1677 while (its--) { 1678 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1679 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1680 1681 /* update rhs: bb1 = bb - B*x */ 1682 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1683 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1684 1685 /* local sweep */ 1686 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1687 } 1688 } else if (flag & SOR_EISENSTAT) { 1689 Vec xx1; 1690 1691 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1692 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1693 1694 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1695 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1696 if (!mat->diag) { 1697 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1698 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1699 } 1700 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1701 if (hasop) { 1702 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1703 } else { 1704 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1705 } 1706 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1707 1708 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1709 1710 /* local sweep */ 1711 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1712 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1713 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1714 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1715 1716 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatPermute_MPIAIJ" 1722 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1723 { 1724 Mat aA,aB,Aperm; 1725 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1726 PetscScalar *aa,*ba; 1727 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1728 PetscSF rowsf,sf; 1729 IS parcolp = PETSC_NULL; 1730 PetscBool done; 1731 PetscErrorCode ierr; 1732 1733 PetscFunctionBegin; 1734 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1735 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1736 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1737 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1738 1739 /* Invert row permutation to find out where my rows should go */ 1740 ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr); 1741 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1742 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1743 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1744 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1745 1746 /* Invert column permutation to find out where my columns should go */ 1747 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1748 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1749 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1750 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1751 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1752 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1753 1754 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1755 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1756 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1757 1758 /* Find out where my gcols should go */ 1759 ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr); 1760 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1761 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1762 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1763 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1764 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1765 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1766 1767 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1768 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1769 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1770 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1771 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1772 for (i=0; i<m; i++) { 1773 PetscInt row = rdest[i],rowner; 1774 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1775 for (j=ai[i]; j<ai[i+1]; j++) { 1776 PetscInt cowner,col = cdest[aj[j]]; 1777 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1778 if (rowner == cowner) dnnz[i]++; 1779 else onnz[i]++; 1780 } 1781 for (j=bi[i]; j<bi[i+1]; j++) { 1782 PetscInt cowner,col = gcdest[bj[j]]; 1783 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1784 if (rowner == cowner) dnnz[i]++; 1785 else onnz[i]++; 1786 } 1787 } 1788 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1789 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1790 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1791 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1792 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1793 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1794 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1795 1796 ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1797 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1798 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1799 for (i=0; i<m; i++) { 1800 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1801 PetscInt rowlen; 1802 rowlen = ai[i+1] - ai[i]; 1803 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1804 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1805 rowlen = bi[i+1] - bi[i]; 1806 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1807 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1808 } 1809 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1810 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1811 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1812 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1813 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1814 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1815 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1816 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1817 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1818 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1819 *B = Aperm; 1820 PetscFunctionReturn(0); 1821 } 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1825 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1826 { 1827 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1828 Mat A = mat->A,B = mat->B; 1829 PetscErrorCode ierr; 1830 PetscReal isend[5],irecv[5]; 1831 1832 PetscFunctionBegin; 1833 info->block_size = 1.0; 1834 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1835 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1836 isend[3] = info->memory; isend[4] = info->mallocs; 1837 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1838 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1839 isend[3] += info->memory; isend[4] += info->mallocs; 1840 if (flag == MAT_LOCAL) { 1841 info->nz_used = isend[0]; 1842 info->nz_allocated = isend[1]; 1843 info->nz_unneeded = isend[2]; 1844 info->memory = isend[3]; 1845 info->mallocs = isend[4]; 1846 } else if (flag == MAT_GLOBAL_MAX) { 1847 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1848 info->nz_used = irecv[0]; 1849 info->nz_allocated = irecv[1]; 1850 info->nz_unneeded = irecv[2]; 1851 info->memory = irecv[3]; 1852 info->mallocs = irecv[4]; 1853 } else if (flag == MAT_GLOBAL_SUM) { 1854 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1855 info->nz_used = irecv[0]; 1856 info->nz_allocated = irecv[1]; 1857 info->nz_unneeded = irecv[2]; 1858 info->memory = irecv[3]; 1859 info->mallocs = irecv[4]; 1860 } 1861 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1862 info->fill_ratio_needed = 0; 1863 info->factor_mallocs = 0; 1864 PetscFunctionReturn(0); 1865 } 1866 1867 #undef __FUNCT__ 1868 #define __FUNCT__ "MatSetOption_MPIAIJ" 1869 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1870 { 1871 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1872 PetscErrorCode ierr; 1873 1874 PetscFunctionBegin; 1875 switch (op) { 1876 case MAT_NEW_NONZERO_LOCATIONS: 1877 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1878 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1879 case MAT_KEEP_NONZERO_PATTERN: 1880 case MAT_NEW_NONZERO_LOCATION_ERR: 1881 case MAT_USE_INODES: 1882 case MAT_IGNORE_ZERO_ENTRIES: 1883 MatCheckPreallocated(A,1); 1884 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1885 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1886 break; 1887 case MAT_ROW_ORIENTED: 1888 a->roworiented = flg; 1889 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1890 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1891 break; 1892 case MAT_NEW_DIAGONALS: 1893 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1894 break; 1895 case MAT_IGNORE_OFF_PROC_ENTRIES: 1896 a->donotstash = flg; 1897 break; 1898 case MAT_SPD: 1899 A->spd_set = PETSC_TRUE; 1900 A->spd = flg; 1901 if (flg) { 1902 A->symmetric = PETSC_TRUE; 1903 A->structurally_symmetric = PETSC_TRUE; 1904 A->symmetric_set = PETSC_TRUE; 1905 A->structurally_symmetric_set = PETSC_TRUE; 1906 } 1907 break; 1908 case MAT_SYMMETRIC: 1909 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1910 break; 1911 case MAT_STRUCTURALLY_SYMMETRIC: 1912 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1913 break; 1914 case MAT_HERMITIAN: 1915 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1916 break; 1917 case MAT_SYMMETRY_ETERNAL: 1918 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1919 break; 1920 default: 1921 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1922 } 1923 PetscFunctionReturn(0); 1924 } 1925 1926 #undef __FUNCT__ 1927 #define __FUNCT__ "MatGetRow_MPIAIJ" 1928 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1929 { 1930 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1931 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1932 PetscErrorCode ierr; 1933 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1934 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1935 PetscInt *cmap,*idx_p; 1936 1937 PetscFunctionBegin; 1938 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1939 mat->getrowactive = PETSC_TRUE; 1940 1941 if (!mat->rowvalues && (idx || v)) { 1942 /* 1943 allocate enough space to hold information from the longest row. 1944 */ 1945 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1946 PetscInt max = 1,tmp; 1947 for (i=0; i<matin->rmap->n; i++) { 1948 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1949 if (max < tmp) { max = tmp; } 1950 } 1951 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1952 } 1953 1954 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1955 lrow = row - rstart; 1956 1957 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1958 if (!v) {pvA = 0; pvB = 0;} 1959 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1960 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1961 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1962 nztot = nzA + nzB; 1963 1964 cmap = mat->garray; 1965 if (v || idx) { 1966 if (nztot) { 1967 /* Sort by increasing column numbers, assuming A and B already sorted */ 1968 PetscInt imark = -1; 1969 if (v) { 1970 *v = v_p = mat->rowvalues; 1971 for (i=0; i<nzB; i++) { 1972 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1973 else break; 1974 } 1975 imark = i; 1976 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1977 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1978 } 1979 if (idx) { 1980 *idx = idx_p = mat->rowindices; 1981 if (imark > -1) { 1982 for (i=0; i<imark; i++) { 1983 idx_p[i] = cmap[cworkB[i]]; 1984 } 1985 } else { 1986 for (i=0; i<nzB; i++) { 1987 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1988 else break; 1989 } 1990 imark = i; 1991 } 1992 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1993 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1994 } 1995 } else { 1996 if (idx) *idx = 0; 1997 if (v) *v = 0; 1998 } 1999 } 2000 *nz = nztot; 2001 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 2002 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 2003 PetscFunctionReturn(0); 2004 } 2005 2006 #undef __FUNCT__ 2007 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 2008 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2009 { 2010 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2011 2012 PetscFunctionBegin; 2013 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2014 aij->getrowactive = PETSC_FALSE; 2015 PetscFunctionReturn(0); 2016 } 2017 2018 #undef __FUNCT__ 2019 #define __FUNCT__ "MatNorm_MPIAIJ" 2020 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2021 { 2022 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2023 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2024 PetscErrorCode ierr; 2025 PetscInt i,j,cstart = mat->cmap->rstart; 2026 PetscReal sum = 0.0; 2027 MatScalar *v; 2028 2029 PetscFunctionBegin; 2030 if (aij->size == 1) { 2031 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2032 } else { 2033 if (type == NORM_FROBENIUS) { 2034 v = amat->a; 2035 for (i=0; i<amat->nz; i++) { 2036 #if defined(PETSC_USE_COMPLEX) 2037 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2038 #else 2039 sum += (*v)*(*v); v++; 2040 #endif 2041 } 2042 v = bmat->a; 2043 for (i=0; i<bmat->nz; i++) { 2044 #if defined(PETSC_USE_COMPLEX) 2045 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2046 #else 2047 sum += (*v)*(*v); v++; 2048 #endif 2049 } 2050 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2051 *norm = PetscSqrtReal(*norm); 2052 } else if (type == NORM_1) { /* max column norm */ 2053 PetscReal *tmp,*tmp2; 2054 PetscInt *jj,*garray = aij->garray; 2055 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2056 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2057 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2058 *norm = 0.0; 2059 v = amat->a; jj = amat->j; 2060 for (j=0; j<amat->nz; j++) { 2061 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 2062 } 2063 v = bmat->a; jj = bmat->j; 2064 for (j=0; j<bmat->nz; j++) { 2065 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2066 } 2067 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2068 for (j=0; j<mat->cmap->N; j++) { 2069 if (tmp2[j] > *norm) *norm = tmp2[j]; 2070 } 2071 ierr = PetscFree(tmp);CHKERRQ(ierr); 2072 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2073 } else if (type == NORM_INFINITY) { /* max row norm */ 2074 PetscReal ntemp = 0.0; 2075 for (j=0; j<aij->A->rmap->n; j++) { 2076 v = amat->a + amat->i[j]; 2077 sum = 0.0; 2078 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2079 sum += PetscAbsScalar(*v); v++; 2080 } 2081 v = bmat->a + bmat->i[j]; 2082 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2083 sum += PetscAbsScalar(*v); v++; 2084 } 2085 if (sum > ntemp) ntemp = sum; 2086 } 2087 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2088 } else { 2089 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2090 } 2091 } 2092 PetscFunctionReturn(0); 2093 } 2094 2095 #undef __FUNCT__ 2096 #define __FUNCT__ "MatTranspose_MPIAIJ" 2097 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2098 { 2099 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2100 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2101 PetscErrorCode ierr; 2102 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2103 PetscInt cstart=A->cmap->rstart,ncol; 2104 Mat B; 2105 MatScalar *array; 2106 2107 PetscFunctionBegin; 2108 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2109 2110 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2111 ai = Aloc->i; aj = Aloc->j; 2112 bi = Bloc->i; bj = Bloc->j; 2113 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2114 PetscInt *d_nnz,*g_nnz,*o_nnz; 2115 PetscSFNode *oloc; 2116 PETSC_UNUSED PetscSF sf; 2117 2118 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2119 /* compute d_nnz for preallocation */ 2120 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2121 for (i=0; i<ai[ma]; i++) { 2122 d_nnz[aj[i]] ++; 2123 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2124 } 2125 #if defined(OMPI_MAJOR_VERSION) /* Open MPI Bug. https://bitbucket.org/petsc/petsc-dev/issue/9/implement-petscsf-without-one-sided */ 2126 ierr = PetscMemcpy(o_nnz,d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2127 #else 2128 /* compute local off-diagonal contributions */ 2129 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2130 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2131 /* map those to global */ 2132 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 2133 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2134 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2135 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2136 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2137 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2138 #endif 2139 2140 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2141 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2142 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2143 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2144 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2145 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2146 #if defined(OMPI_MAJOR_VERSION) 2147 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2148 #endif 2149 } else { 2150 B = *matout; 2151 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2152 for (i=0; i<ai[ma]; i++){ 2153 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2154 } 2155 } 2156 2157 /* copy over the A part */ 2158 array = Aloc->a; 2159 row = A->rmap->rstart; 2160 for (i=0; i<ma; i++) { 2161 ncol = ai[i+1]-ai[i]; 2162 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2163 row++; array += ncol; aj += ncol; 2164 } 2165 aj = Aloc->j; 2166 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2167 2168 /* copy over the B part */ 2169 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2170 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2171 array = Bloc->a; 2172 row = A->rmap->rstart; 2173 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2174 cols_tmp = cols; 2175 for (i=0; i<mb; i++) { 2176 ncol = bi[i+1]-bi[i]; 2177 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2178 row++; array += ncol; cols_tmp += ncol; 2179 } 2180 ierr = PetscFree(cols);CHKERRQ(ierr); 2181 2182 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2183 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2184 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2185 *matout = B; 2186 } else { 2187 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2188 } 2189 PetscFunctionReturn(0); 2190 } 2191 2192 #undef __FUNCT__ 2193 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2194 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2195 { 2196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2197 Mat a = aij->A,b = aij->B; 2198 PetscErrorCode ierr; 2199 PetscInt s1,s2,s3; 2200 2201 PetscFunctionBegin; 2202 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2203 if (rr) { 2204 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2205 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2206 /* Overlap communication with computation. */ 2207 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2208 } 2209 if (ll) { 2210 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2211 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2212 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2213 } 2214 /* scale the diagonal block */ 2215 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2216 2217 if (rr) { 2218 /* Do a scatter end and then right scale the off-diagonal block */ 2219 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2220 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2221 } 2222 2223 PetscFunctionReturn(0); 2224 } 2225 2226 #undef __FUNCT__ 2227 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2228 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2229 { 2230 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2231 PetscErrorCode ierr; 2232 2233 PetscFunctionBegin; 2234 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2235 PetscFunctionReturn(0); 2236 } 2237 2238 #undef __FUNCT__ 2239 #define __FUNCT__ "MatEqual_MPIAIJ" 2240 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2241 { 2242 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2243 Mat a,b,c,d; 2244 PetscBool flg; 2245 PetscErrorCode ierr; 2246 2247 PetscFunctionBegin; 2248 a = matA->A; b = matA->B; 2249 c = matB->A; d = matB->B; 2250 2251 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2252 if (flg) { 2253 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2254 } 2255 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2256 PetscFunctionReturn(0); 2257 } 2258 2259 #undef __FUNCT__ 2260 #define __FUNCT__ "MatCopy_MPIAIJ" 2261 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2262 { 2263 PetscErrorCode ierr; 2264 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2265 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2266 2267 PetscFunctionBegin; 2268 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2269 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2270 /* because of the column compression in the off-processor part of the matrix a->B, 2271 the number of columns in a->B and b->B may be different, hence we cannot call 2272 the MatCopy() directly on the two parts. If need be, we can provide a more 2273 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2274 then copying the submatrices */ 2275 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2276 } else { 2277 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2278 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2279 } 2280 PetscFunctionReturn(0); 2281 } 2282 2283 #undef __FUNCT__ 2284 #define __FUNCT__ "MatSetUp_MPIAIJ" 2285 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2286 { 2287 PetscErrorCode ierr; 2288 2289 PetscFunctionBegin; 2290 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2296 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2297 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2298 { 2299 PetscInt i,m=Y->rmap->N; 2300 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2301 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2302 const PetscInt *xi = x->i,*yi = y->i; 2303 2304 PetscFunctionBegin; 2305 /* Set the number of nonzeros in the new matrix */ 2306 for (i=0; i<m; i++) { 2307 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2308 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2309 nnz[i] = 0; 2310 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2311 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2312 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2313 nnz[i]++; 2314 } 2315 for (; k<nzy; k++) nnz[i]++; 2316 } 2317 PetscFunctionReturn(0); 2318 } 2319 2320 #undef __FUNCT__ 2321 #define __FUNCT__ "MatAXPY_MPIAIJ" 2322 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2323 { 2324 PetscErrorCode ierr; 2325 PetscInt i; 2326 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2327 PetscBLASInt bnz,one=1; 2328 Mat_SeqAIJ *x,*y; 2329 2330 PetscFunctionBegin; 2331 if (str == SAME_NONZERO_PATTERN) { 2332 PetscScalar alpha = a; 2333 x = (Mat_SeqAIJ *)xx->A->data; 2334 y = (Mat_SeqAIJ *)yy->A->data; 2335 bnz = PetscBLASIntCast(x->nz); 2336 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2337 x = (Mat_SeqAIJ *)xx->B->data; 2338 y = (Mat_SeqAIJ *)yy->B->data; 2339 bnz = PetscBLASIntCast(x->nz); 2340 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2341 } else if (str == SUBSET_NONZERO_PATTERN) { 2342 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2343 2344 x = (Mat_SeqAIJ *)xx->B->data; 2345 y = (Mat_SeqAIJ *)yy->B->data; 2346 if (y->xtoy && y->XtoY != xx->B) { 2347 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2348 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2349 } 2350 if (!y->xtoy) { /* get xtoy */ 2351 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2352 y->XtoY = xx->B; 2353 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2354 } 2355 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2356 } else { 2357 Mat B; 2358 PetscInt *nnz_d,*nnz_o; 2359 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2360 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2361 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2362 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2363 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2364 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2365 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2366 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2367 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2368 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2369 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2370 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2371 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2372 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2373 } 2374 PetscFunctionReturn(0); 2375 } 2376 2377 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2378 2379 #undef __FUNCT__ 2380 #define __FUNCT__ "MatConjugate_MPIAIJ" 2381 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2382 { 2383 #if defined(PETSC_USE_COMPLEX) 2384 PetscErrorCode ierr; 2385 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2386 2387 PetscFunctionBegin; 2388 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2389 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2390 #else 2391 PetscFunctionBegin; 2392 #endif 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #undef __FUNCT__ 2397 #define __FUNCT__ "MatRealPart_MPIAIJ" 2398 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2399 { 2400 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2401 PetscErrorCode ierr; 2402 2403 PetscFunctionBegin; 2404 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2405 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2406 PetscFunctionReturn(0); 2407 } 2408 2409 #undef __FUNCT__ 2410 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2411 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2412 { 2413 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2414 PetscErrorCode ierr; 2415 2416 PetscFunctionBegin; 2417 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2418 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2419 PetscFunctionReturn(0); 2420 } 2421 2422 #ifdef PETSC_HAVE_PBGL 2423 2424 #include <boost/parallel/mpi/bsp_process_group.hpp> 2425 #include <boost/graph/distributed/ilu_default_graph.hpp> 2426 #include <boost/graph/distributed/ilu_0_block.hpp> 2427 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2428 #include <boost/graph/distributed/petsc/interface.hpp> 2429 #include <boost/multi_array.hpp> 2430 #include <boost/parallel/distributed_property_map->hpp> 2431 2432 #undef __FUNCT__ 2433 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2434 /* 2435 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2436 */ 2437 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2438 { 2439 namespace petsc = boost::distributed::petsc; 2440 2441 namespace graph_dist = boost::graph::distributed; 2442 using boost::graph::distributed::ilu_default::process_group_type; 2443 using boost::graph::ilu_permuted; 2444 2445 PetscBool row_identity, col_identity; 2446 PetscContainer c; 2447 PetscInt m, n, M, N; 2448 PetscErrorCode ierr; 2449 2450 PetscFunctionBegin; 2451 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2452 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2453 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2454 if (!row_identity || !col_identity) { 2455 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2456 } 2457 2458 process_group_type pg; 2459 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2460 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2461 lgraph_type& level_graph = *lgraph_p; 2462 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2463 2464 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2465 ilu_permuted(level_graph); 2466 2467 /* put together the new matrix */ 2468 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2469 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2470 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2471 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2472 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2473 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2474 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2475 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2476 2477 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2478 ierr = PetscContainerSetPointer(c, lgraph_p); 2479 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2480 ierr = PetscContainerDestroy(&c); 2481 PetscFunctionReturn(0); 2482 } 2483 2484 #undef __FUNCT__ 2485 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2486 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2487 { 2488 PetscFunctionBegin; 2489 PetscFunctionReturn(0); 2490 } 2491 2492 #undef __FUNCT__ 2493 #define __FUNCT__ "MatSolve_MPIAIJ" 2494 /* 2495 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2496 */ 2497 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2498 { 2499 namespace graph_dist = boost::graph::distributed; 2500 2501 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2502 lgraph_type* lgraph_p; 2503 PetscContainer c; 2504 PetscErrorCode ierr; 2505 2506 PetscFunctionBegin; 2507 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2508 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2509 ierr = VecCopy(b, x);CHKERRQ(ierr); 2510 2511 PetscScalar* array_x; 2512 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2513 PetscInt sx; 2514 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2515 2516 PetscScalar* array_b; 2517 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2518 PetscInt sb; 2519 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2520 2521 lgraph_type& level_graph = *lgraph_p; 2522 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2523 2524 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2525 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2526 ref_x(array_x, boost::extents[num_vertices(graph)]); 2527 2528 typedef boost::iterator_property_map<array_ref_type::iterator, 2529 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2530 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2531 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2532 2533 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2534 2535 PetscFunctionReturn(0); 2536 } 2537 #endif 2538 2539 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2540 PetscInt nzlocal,nsends,nrecvs; 2541 PetscMPIInt *send_rank,*recv_rank; 2542 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2543 PetscScalar *sbuf_a,**rbuf_a; 2544 PetscErrorCode (*Destroy)(Mat); 2545 } Mat_Redundant; 2546 2547 #undef __FUNCT__ 2548 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2549 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2550 { 2551 PetscErrorCode ierr; 2552 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2553 PetscInt i; 2554 2555 PetscFunctionBegin; 2556 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2557 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2558 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2559 for (i=0; i<redund->nrecvs; i++){ 2560 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2561 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2562 } 2563 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2564 ierr = PetscFree(redund);CHKERRQ(ierr); 2565 PetscFunctionReturn(0); 2566 } 2567 2568 #undef __FUNCT__ 2569 #define __FUNCT__ "MatDestroy_MatRedundant" 2570 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2571 { 2572 PetscErrorCode ierr; 2573 PetscContainer container; 2574 Mat_Redundant *redund=PETSC_NULL; 2575 2576 PetscFunctionBegin; 2577 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2578 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2579 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2580 A->ops->destroy = redund->Destroy; 2581 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2582 if (A->ops->destroy) { 2583 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2584 } 2585 PetscFunctionReturn(0); 2586 } 2587 2588 #undef __FUNCT__ 2589 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2590 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2591 { 2592 PetscMPIInt rank,size; 2593 MPI_Comm comm=((PetscObject)mat)->comm; 2594 PetscErrorCode ierr; 2595 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2596 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2597 PetscInt *rowrange=mat->rmap->range; 2598 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2599 Mat A=aij->A,B=aij->B,C=*matredundant; 2600 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2601 PetscScalar *sbuf_a; 2602 PetscInt nzlocal=a->nz+b->nz; 2603 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2604 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2605 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2606 MatScalar *aworkA,*aworkB; 2607 PetscScalar *vals; 2608 PetscMPIInt tag1,tag2,tag3,imdex; 2609 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2610 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2611 MPI_Status recv_status,*send_status; 2612 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2613 PetscInt **rbuf_j=PETSC_NULL; 2614 PetscScalar **rbuf_a=PETSC_NULL; 2615 Mat_Redundant *redund=PETSC_NULL; 2616 PetscContainer container; 2617 2618 PetscFunctionBegin; 2619 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2620 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2621 2622 if (reuse == MAT_REUSE_MATRIX) { 2623 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2624 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2625 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2626 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2627 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2628 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2629 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2630 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2631 2632 nsends = redund->nsends; 2633 nrecvs = redund->nrecvs; 2634 send_rank = redund->send_rank; 2635 recv_rank = redund->recv_rank; 2636 sbuf_nz = redund->sbuf_nz; 2637 rbuf_nz = redund->rbuf_nz; 2638 sbuf_j = redund->sbuf_j; 2639 sbuf_a = redund->sbuf_a; 2640 rbuf_j = redund->rbuf_j; 2641 rbuf_a = redund->rbuf_a; 2642 } 2643 2644 if (reuse == MAT_INITIAL_MATRIX){ 2645 PetscMPIInt subrank,subsize; 2646 PetscInt nleftover,np_subcomm; 2647 /* get the destination processors' id send_rank, nsends and nrecvs */ 2648 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2649 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2650 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2651 np_subcomm = size/nsubcomm; 2652 nleftover = size - nsubcomm*np_subcomm; 2653 nsends = 0; nrecvs = 0; 2654 for (i=0; i<size; i++){ /* i=rank*/ 2655 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2656 send_rank[nsends] = i; nsends++; 2657 recv_rank[nrecvs++] = i; 2658 } 2659 } 2660 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2661 i = size-nleftover-1; 2662 j = 0; 2663 while (j < nsubcomm - nleftover){ 2664 send_rank[nsends++] = i; 2665 i--; j++; 2666 } 2667 } 2668 2669 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2670 for (i=0; i<nleftover; i++){ 2671 recv_rank[nrecvs++] = size-nleftover+i; 2672 } 2673 } 2674 2675 /* allocate sbuf_j, sbuf_a */ 2676 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2677 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2678 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2679 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2680 2681 /* copy mat's local entries into the buffers */ 2682 if (reuse == MAT_INITIAL_MATRIX){ 2683 rownz_max = 0; 2684 rptr = sbuf_j; 2685 cols = sbuf_j + rend-rstart + 1; 2686 vals = sbuf_a; 2687 rptr[0] = 0; 2688 for (i=0; i<rend-rstart; i++){ 2689 row = i + rstart; 2690 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2691 ncols = nzA + nzB; 2692 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2693 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2694 /* load the column indices for this row into cols */ 2695 lwrite = 0; 2696 for (l=0; l<nzB; l++) { 2697 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2698 vals[lwrite] = aworkB[l]; 2699 cols[lwrite++] = ctmp; 2700 } 2701 } 2702 for (l=0; l<nzA; l++){ 2703 vals[lwrite] = aworkA[l]; 2704 cols[lwrite++] = cstart + cworkA[l]; 2705 } 2706 for (l=0; l<nzB; l++) { 2707 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2708 vals[lwrite] = aworkB[l]; 2709 cols[lwrite++] = ctmp; 2710 } 2711 } 2712 vals += ncols; 2713 cols += ncols; 2714 rptr[i+1] = rptr[i] + ncols; 2715 if (rownz_max < ncols) rownz_max = ncols; 2716 } 2717 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2718 } else { /* only copy matrix values into sbuf_a */ 2719 rptr = sbuf_j; 2720 vals = sbuf_a; 2721 rptr[0] = 0; 2722 for (i=0; i<rend-rstart; i++){ 2723 row = i + rstart; 2724 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2725 ncols = nzA + nzB; 2726 cworkB = b->j + b->i[i]; 2727 aworkA = a->a + a->i[i]; 2728 aworkB = b->a + b->i[i]; 2729 lwrite = 0; 2730 for (l=0; l<nzB; l++) { 2731 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2732 } 2733 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2734 for (l=0; l<nzB; l++) { 2735 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2736 } 2737 vals += ncols; 2738 rptr[i+1] = rptr[i] + ncols; 2739 } 2740 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2741 2742 /* send nzlocal to others, and recv other's nzlocal */ 2743 /*--------------------------------------------------*/ 2744 if (reuse == MAT_INITIAL_MATRIX){ 2745 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2746 s_waits2 = s_waits3 + nsends; 2747 s_waits1 = s_waits2 + nsends; 2748 r_waits1 = s_waits1 + nsends; 2749 r_waits2 = r_waits1 + nrecvs; 2750 r_waits3 = r_waits2 + nrecvs; 2751 } else { 2752 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2753 r_waits3 = s_waits3 + nsends; 2754 } 2755 2756 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2757 if (reuse == MAT_INITIAL_MATRIX){ 2758 /* get new tags to keep the communication clean */ 2759 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2760 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2761 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2762 2763 /* post receives of other's nzlocal */ 2764 for (i=0; i<nrecvs; i++){ 2765 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2766 } 2767 /* send nzlocal to others */ 2768 for (i=0; i<nsends; i++){ 2769 sbuf_nz[i] = nzlocal; 2770 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2771 } 2772 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2773 count = nrecvs; 2774 while (count) { 2775 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2776 recv_rank[imdex] = recv_status.MPI_SOURCE; 2777 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2778 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2779 2780 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2781 rbuf_nz[imdex] += i + 2; 2782 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2783 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2784 count--; 2785 } 2786 /* wait on sends of nzlocal */ 2787 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2788 /* send mat->i,j to others, and recv from other's */ 2789 /*------------------------------------------------*/ 2790 for (i=0; i<nsends; i++){ 2791 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2792 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2793 } 2794 /* wait on receives of mat->i,j */ 2795 /*------------------------------*/ 2796 count = nrecvs; 2797 while (count) { 2798 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2799 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2800 count--; 2801 } 2802 /* wait on sends of mat->i,j */ 2803 /*---------------------------*/ 2804 if (nsends) { 2805 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2806 } 2807 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2808 2809 /* post receives, send and receive mat->a */ 2810 /*----------------------------------------*/ 2811 for (imdex=0; imdex<nrecvs; imdex++) { 2812 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2813 } 2814 for (i=0; i<nsends; i++){ 2815 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2816 } 2817 count = nrecvs; 2818 while (count) { 2819 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2820 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2821 count--; 2822 } 2823 if (nsends) { 2824 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2825 } 2826 2827 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2828 2829 /* create redundant matrix */ 2830 /*-------------------------*/ 2831 if (reuse == MAT_INITIAL_MATRIX){ 2832 /* compute rownz_max for preallocation */ 2833 for (imdex=0; imdex<nrecvs; imdex++){ 2834 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2835 rptr = rbuf_j[imdex]; 2836 for (i=0; i<j; i++){ 2837 ncols = rptr[i+1] - rptr[i]; 2838 if (rownz_max < ncols) rownz_max = ncols; 2839 } 2840 } 2841 2842 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2843 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2844 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2845 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2846 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2847 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2848 } else { 2849 C = *matredundant; 2850 } 2851 2852 /* insert local matrix entries */ 2853 rptr = sbuf_j; 2854 cols = sbuf_j + rend-rstart + 1; 2855 vals = sbuf_a; 2856 for (i=0; i<rend-rstart; i++){ 2857 row = i + rstart; 2858 ncols = rptr[i+1] - rptr[i]; 2859 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2860 vals += ncols; 2861 cols += ncols; 2862 } 2863 /* insert received matrix entries */ 2864 for (imdex=0; imdex<nrecvs; imdex++){ 2865 rstart = rowrange[recv_rank[imdex]]; 2866 rend = rowrange[recv_rank[imdex]+1]; 2867 rptr = rbuf_j[imdex]; 2868 cols = rbuf_j[imdex] + rend-rstart + 1; 2869 vals = rbuf_a[imdex]; 2870 for (i=0; i<rend-rstart; i++){ 2871 row = i + rstart; 2872 ncols = rptr[i+1] - rptr[i]; 2873 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2874 vals += ncols; 2875 cols += ncols; 2876 } 2877 } 2878 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2879 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2880 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2881 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2882 if (reuse == MAT_INITIAL_MATRIX) { 2883 PetscContainer container; 2884 *matredundant = C; 2885 /* create a supporting struct and attach it to C for reuse */ 2886 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2887 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2888 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2889 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2890 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2891 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2892 2893 redund->nzlocal = nzlocal; 2894 redund->nsends = nsends; 2895 redund->nrecvs = nrecvs; 2896 redund->send_rank = send_rank; 2897 redund->recv_rank = recv_rank; 2898 redund->sbuf_nz = sbuf_nz; 2899 redund->rbuf_nz = rbuf_nz; 2900 redund->sbuf_j = sbuf_j; 2901 redund->sbuf_a = sbuf_a; 2902 redund->rbuf_j = rbuf_j; 2903 redund->rbuf_a = rbuf_a; 2904 2905 redund->Destroy = C->ops->destroy; 2906 C->ops->destroy = MatDestroy_MatRedundant; 2907 } 2908 PetscFunctionReturn(0); 2909 } 2910 2911 #undef __FUNCT__ 2912 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2913 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2914 { 2915 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2916 PetscErrorCode ierr; 2917 PetscInt i,*idxb = 0; 2918 PetscScalar *va,*vb; 2919 Vec vtmp; 2920 2921 PetscFunctionBegin; 2922 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2923 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2924 if (idx) { 2925 for (i=0; i<A->rmap->n; i++) { 2926 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2927 } 2928 } 2929 2930 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2931 if (idx) { 2932 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2933 } 2934 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2935 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2936 2937 for (i=0; i<A->rmap->n; i++){ 2938 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2939 va[i] = vb[i]; 2940 if (idx) idx[i] = a->garray[idxb[i]]; 2941 } 2942 } 2943 2944 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2945 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2946 ierr = PetscFree(idxb);CHKERRQ(ierr); 2947 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2948 PetscFunctionReturn(0); 2949 } 2950 2951 #undef __FUNCT__ 2952 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2953 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2954 { 2955 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2956 PetscErrorCode ierr; 2957 PetscInt i,*idxb = 0; 2958 PetscScalar *va,*vb; 2959 Vec vtmp; 2960 2961 PetscFunctionBegin; 2962 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2963 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2964 if (idx) { 2965 for (i=0; i<A->cmap->n; i++) { 2966 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2967 } 2968 } 2969 2970 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2971 if (idx) { 2972 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2973 } 2974 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2975 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2976 2977 for (i=0; i<A->rmap->n; i++){ 2978 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2979 va[i] = vb[i]; 2980 if (idx) idx[i] = a->garray[idxb[i]]; 2981 } 2982 } 2983 2984 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2985 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2986 ierr = PetscFree(idxb);CHKERRQ(ierr); 2987 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2988 PetscFunctionReturn(0); 2989 } 2990 2991 #undef __FUNCT__ 2992 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2993 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2994 { 2995 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2996 PetscInt n = A->rmap->n; 2997 PetscInt cstart = A->cmap->rstart; 2998 PetscInt *cmap = mat->garray; 2999 PetscInt *diagIdx, *offdiagIdx; 3000 Vec diagV, offdiagV; 3001 PetscScalar *a, *diagA, *offdiagA; 3002 PetscInt r; 3003 PetscErrorCode ierr; 3004 3005 PetscFunctionBegin; 3006 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3007 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 3008 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 3009 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3010 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3011 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3012 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3013 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3014 for (r = 0; r < n; ++r) { 3015 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3016 a[r] = diagA[r]; 3017 idx[r] = cstart + diagIdx[r]; 3018 } else { 3019 a[r] = offdiagA[r]; 3020 idx[r] = cmap[offdiagIdx[r]]; 3021 } 3022 } 3023 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3024 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3025 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3026 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3027 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3028 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3029 PetscFunctionReturn(0); 3030 } 3031 3032 #undef __FUNCT__ 3033 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3034 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3035 { 3036 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 3037 PetscInt n = A->rmap->n; 3038 PetscInt cstart = A->cmap->rstart; 3039 PetscInt *cmap = mat->garray; 3040 PetscInt *diagIdx, *offdiagIdx; 3041 Vec diagV, offdiagV; 3042 PetscScalar *a, *diagA, *offdiagA; 3043 PetscInt r; 3044 PetscErrorCode ierr; 3045 3046 PetscFunctionBegin; 3047 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3048 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3049 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3050 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3051 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3052 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3053 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3054 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3055 for (r = 0; r < n; ++r) { 3056 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3057 a[r] = diagA[r]; 3058 idx[r] = cstart + diagIdx[r]; 3059 } else { 3060 a[r] = offdiagA[r]; 3061 idx[r] = cmap[offdiagIdx[r]]; 3062 } 3063 } 3064 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3065 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3066 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3067 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3068 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3069 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3070 PetscFunctionReturn(0); 3071 } 3072 3073 #undef __FUNCT__ 3074 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3075 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3076 { 3077 PetscErrorCode ierr; 3078 Mat *dummy; 3079 3080 PetscFunctionBegin; 3081 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3082 *newmat = *dummy; 3083 ierr = PetscFree(dummy);CHKERRQ(ierr); 3084 PetscFunctionReturn(0); 3085 } 3086 3087 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3088 3089 #undef __FUNCT__ 3090 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3091 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3092 { 3093 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3094 PetscErrorCode ierr; 3095 3096 PetscFunctionBegin; 3097 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3098 PetscFunctionReturn(0); 3099 } 3100 3101 #undef __FUNCT__ 3102 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3103 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3104 { 3105 PetscErrorCode ierr; 3106 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3107 3108 PetscFunctionBegin; 3109 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3110 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3111 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3112 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3113 PetscFunctionReturn(0); 3114 } 3115 3116 /* -------------------------------------------------------------------*/ 3117 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3118 MatGetRow_MPIAIJ, 3119 MatRestoreRow_MPIAIJ, 3120 MatMult_MPIAIJ, 3121 /* 4*/ MatMultAdd_MPIAIJ, 3122 MatMultTranspose_MPIAIJ, 3123 MatMultTransposeAdd_MPIAIJ, 3124 #ifdef PETSC_HAVE_PBGL 3125 MatSolve_MPIAIJ, 3126 #else 3127 0, 3128 #endif 3129 0, 3130 0, 3131 /*10*/ 0, 3132 0, 3133 0, 3134 MatSOR_MPIAIJ, 3135 MatTranspose_MPIAIJ, 3136 /*15*/ MatGetInfo_MPIAIJ, 3137 MatEqual_MPIAIJ, 3138 MatGetDiagonal_MPIAIJ, 3139 MatDiagonalScale_MPIAIJ, 3140 MatNorm_MPIAIJ, 3141 /*20*/ MatAssemblyBegin_MPIAIJ, 3142 MatAssemblyEnd_MPIAIJ, 3143 MatSetOption_MPIAIJ, 3144 MatZeroEntries_MPIAIJ, 3145 /*24*/ MatZeroRows_MPIAIJ, 3146 0, 3147 #ifdef PETSC_HAVE_PBGL 3148 0, 3149 #else 3150 0, 3151 #endif 3152 0, 3153 0, 3154 /*29*/ MatSetUp_MPIAIJ, 3155 #ifdef PETSC_HAVE_PBGL 3156 0, 3157 #else 3158 0, 3159 #endif 3160 0, 3161 0, 3162 0, 3163 /*34*/ MatDuplicate_MPIAIJ, 3164 0, 3165 0, 3166 0, 3167 0, 3168 /*39*/ MatAXPY_MPIAIJ, 3169 MatGetSubMatrices_MPIAIJ, 3170 MatIncreaseOverlap_MPIAIJ, 3171 MatGetValues_MPIAIJ, 3172 MatCopy_MPIAIJ, 3173 /*44*/ MatGetRowMax_MPIAIJ, 3174 MatScale_MPIAIJ, 3175 0, 3176 0, 3177 MatZeroRowsColumns_MPIAIJ, 3178 /*49*/ MatSetRandom_MPIAIJ, 3179 0, 3180 0, 3181 0, 3182 0, 3183 /*54*/ MatFDColoringCreate_MPIAIJ, 3184 0, 3185 MatSetUnfactored_MPIAIJ, 3186 MatPermute_MPIAIJ, 3187 0, 3188 /*59*/ MatGetSubMatrix_MPIAIJ, 3189 MatDestroy_MPIAIJ, 3190 MatView_MPIAIJ, 3191 0, 3192 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3193 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3194 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3195 0, 3196 0, 3197 0, 3198 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3199 MatGetRowMinAbs_MPIAIJ, 3200 0, 3201 MatSetColoring_MPIAIJ, 3202 #if defined(PETSC_HAVE_ADIC) 3203 MatSetValuesAdic_MPIAIJ, 3204 #else 3205 0, 3206 #endif 3207 MatSetValuesAdifor_MPIAIJ, 3208 /*75*/ MatFDColoringApply_AIJ, 3209 0, 3210 0, 3211 0, 3212 MatFindZeroDiagonals_MPIAIJ, 3213 /*80*/ 0, 3214 0, 3215 0, 3216 /*83*/ MatLoad_MPIAIJ, 3217 0, 3218 0, 3219 0, 3220 0, 3221 0, 3222 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3223 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3224 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3225 MatPtAP_MPIAIJ_MPIAIJ, 3226 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3227 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3228 0, 3229 0, 3230 0, 3231 0, 3232 /*99*/ 0, 3233 0, 3234 0, 3235 MatConjugate_MPIAIJ, 3236 0, 3237 /*104*/MatSetValuesRow_MPIAIJ, 3238 MatRealPart_MPIAIJ, 3239 MatImaginaryPart_MPIAIJ, 3240 0, 3241 0, 3242 /*109*/0, 3243 MatGetRedundantMatrix_MPIAIJ, 3244 MatGetRowMin_MPIAIJ, 3245 0, 3246 0, 3247 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3248 0, 3249 0, 3250 0, 3251 0, 3252 /*119*/0, 3253 0, 3254 0, 3255 0, 3256 MatGetMultiProcBlock_MPIAIJ, 3257 /*124*/MatFindNonzeroRows_MPIAIJ, 3258 MatGetColumnNorms_MPIAIJ, 3259 MatInvertBlockDiagonal_MPIAIJ, 3260 0, 3261 MatGetSubMatricesParallel_MPIAIJ, 3262 /*129*/0, 3263 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3264 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3265 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3266 0, 3267 /*134*/0, 3268 0, 3269 0, 3270 0, 3271 0 3272 }; 3273 3274 /* ----------------------------------------------------------------------------------------*/ 3275 3276 EXTERN_C_BEGIN 3277 #undef __FUNCT__ 3278 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3279 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3280 { 3281 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3282 PetscErrorCode ierr; 3283 3284 PetscFunctionBegin; 3285 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3286 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3287 PetscFunctionReturn(0); 3288 } 3289 EXTERN_C_END 3290 3291 EXTERN_C_BEGIN 3292 #undef __FUNCT__ 3293 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3294 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3295 { 3296 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3297 PetscErrorCode ierr; 3298 3299 PetscFunctionBegin; 3300 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3301 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3302 PetscFunctionReturn(0); 3303 } 3304 EXTERN_C_END 3305 3306 EXTERN_C_BEGIN 3307 #undef __FUNCT__ 3308 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3309 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3310 { 3311 Mat_MPIAIJ *b; 3312 PetscErrorCode ierr; 3313 PetscInt i; 3314 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3315 3316 PetscFunctionBegin; 3317 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3318 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3319 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3320 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3321 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3322 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3323 3324 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3325 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3326 if (d_nnz) { 3327 for (i=0; i<B->rmap->n; i++) { 3328 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3329 } 3330 } 3331 if (o_nnz) { 3332 for (i=0; i<B->rmap->n; i++) { 3333 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3334 } 3335 } 3336 b = (Mat_MPIAIJ*)B->data; 3337 3338 if (!B->preallocated) { 3339 /* Explicitly create 2 MATSEQAIJ matrices. */ 3340 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3341 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3342 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3343 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3344 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3345 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3346 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3347 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3348 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3349 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3350 } 3351 3352 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3353 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3354 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3355 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3356 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3357 B->preallocated = PETSC_TRUE; 3358 PetscFunctionReturn(0); 3359 } 3360 EXTERN_C_END 3361 3362 #undef __FUNCT__ 3363 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3364 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3365 { 3366 Mat mat; 3367 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3368 PetscErrorCode ierr; 3369 3370 PetscFunctionBegin; 3371 *newmat = 0; 3372 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3373 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3374 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3375 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3376 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3377 a = (Mat_MPIAIJ*)mat->data; 3378 3379 mat->factortype = matin->factortype; 3380 mat->rmap->bs = matin->rmap->bs; 3381 mat->cmap->bs = matin->cmap->bs; 3382 mat->assembled = PETSC_TRUE; 3383 mat->insertmode = NOT_SET_VALUES; 3384 mat->preallocated = PETSC_TRUE; 3385 3386 a->size = oldmat->size; 3387 a->rank = oldmat->rank; 3388 a->donotstash = oldmat->donotstash; 3389 a->roworiented = oldmat->roworiented; 3390 a->rowindices = 0; 3391 a->rowvalues = 0; 3392 a->getrowactive = PETSC_FALSE; 3393 3394 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3395 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3396 3397 if (oldmat->colmap) { 3398 #if defined (PETSC_USE_CTABLE) 3399 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3400 #else 3401 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3402 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3403 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3404 #endif 3405 } else a->colmap = 0; 3406 if (oldmat->garray) { 3407 PetscInt len; 3408 len = oldmat->B->cmap->n; 3409 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3410 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3411 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3412 } else a->garray = 0; 3413 3414 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3415 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3416 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3417 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3418 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3419 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3420 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3421 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3422 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3423 *newmat = mat; 3424 PetscFunctionReturn(0); 3425 } 3426 3427 3428 3429 #undef __FUNCT__ 3430 #define __FUNCT__ "MatLoad_MPIAIJ" 3431 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3432 { 3433 PetscScalar *vals,*svals; 3434 MPI_Comm comm = ((PetscObject)viewer)->comm; 3435 PetscErrorCode ierr; 3436 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3437 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3438 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3439 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3440 PetscInt cend,cstart,n,*rowners,sizesset=1; 3441 int fd; 3442 PetscInt bs = 1; 3443 3444 PetscFunctionBegin; 3445 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3446 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3447 if (!rank) { 3448 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3449 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3450 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3451 } 3452 3453 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3454 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3455 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3456 3457 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3458 3459 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3460 M = header[1]; N = header[2]; 3461 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3462 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3463 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3464 3465 /* If global sizes are set, check if they are consistent with that given in the file */ 3466 if (sizesset) { 3467 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3468 } 3469 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3470 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3471 3472 /* determine ownership of all (block) rows */ 3473 if ( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3474 if (newMat->rmap->n < 0 ) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3475 else m = newMat->rmap->n; /* Set by user */ 3476 3477 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3478 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3479 3480 /* First process needs enough room for process with most rows */ 3481 if (!rank) { 3482 mmax = rowners[1]; 3483 for (i=2; i<=size; i++) { 3484 mmax = PetscMax(mmax, rowners[i]); 3485 } 3486 } else mmax = m; 3487 3488 rowners[0] = 0; 3489 for (i=2; i<=size; i++) { 3490 rowners[i] += rowners[i-1]; 3491 } 3492 rstart = rowners[rank]; 3493 rend = rowners[rank+1]; 3494 3495 /* distribute row lengths to all processors */ 3496 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3497 if (!rank) { 3498 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3499 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3500 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3501 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3502 for (j=0; j<m; j++) { 3503 procsnz[0] += ourlens[j]; 3504 } 3505 for (i=1; i<size; i++) { 3506 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3507 /* calculate the number of nonzeros on each processor */ 3508 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3509 procsnz[i] += rowlengths[j]; 3510 } 3511 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3512 } 3513 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3514 } else { 3515 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3516 } 3517 3518 if (!rank) { 3519 /* determine max buffer needed and allocate it */ 3520 maxnz = 0; 3521 for (i=0; i<size; i++) { 3522 maxnz = PetscMax(maxnz,procsnz[i]); 3523 } 3524 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3525 3526 /* read in my part of the matrix column indices */ 3527 nz = procsnz[0]; 3528 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3529 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3530 3531 /* read in every one elses and ship off */ 3532 for (i=1; i<size; i++) { 3533 nz = procsnz[i]; 3534 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3535 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3536 } 3537 ierr = PetscFree(cols);CHKERRQ(ierr); 3538 } else { 3539 /* determine buffer space needed for message */ 3540 nz = 0; 3541 for (i=0; i<m; i++) { 3542 nz += ourlens[i]; 3543 } 3544 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3545 3546 /* receive message of column indices*/ 3547 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3548 } 3549 3550 /* determine column ownership if matrix is not square */ 3551 if (N != M) { 3552 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3553 else n = newMat->cmap->n; 3554 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3555 cstart = cend - n; 3556 } else { 3557 cstart = rstart; 3558 cend = rend; 3559 n = cend - cstart; 3560 } 3561 3562 /* loop over local rows, determining number of off diagonal entries */ 3563 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3564 jj = 0; 3565 for (i=0; i<m; i++) { 3566 for (j=0; j<ourlens[i]; j++) { 3567 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3568 jj++; 3569 } 3570 } 3571 3572 for (i=0; i<m; i++) { 3573 ourlens[i] -= offlens[i]; 3574 } 3575 if (!sizesset) { 3576 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3577 } 3578 3579 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3580 3581 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3582 3583 for (i=0; i<m; i++) { 3584 ourlens[i] += offlens[i]; 3585 } 3586 3587 if (!rank) { 3588 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3589 3590 /* read in my part of the matrix numerical values */ 3591 nz = procsnz[0]; 3592 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3593 3594 /* insert into matrix */ 3595 jj = rstart; 3596 smycols = mycols; 3597 svals = vals; 3598 for (i=0; i<m; i++) { 3599 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3600 smycols += ourlens[i]; 3601 svals += ourlens[i]; 3602 jj++; 3603 } 3604 3605 /* read in other processors and ship out */ 3606 for (i=1; i<size; i++) { 3607 nz = procsnz[i]; 3608 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3609 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3610 } 3611 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3612 } else { 3613 /* receive numeric values */ 3614 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3615 3616 /* receive message of values*/ 3617 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3618 3619 /* insert into matrix */ 3620 jj = rstart; 3621 smycols = mycols; 3622 svals = vals; 3623 for (i=0; i<m; i++) { 3624 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3625 smycols += ourlens[i]; 3626 svals += ourlens[i]; 3627 jj++; 3628 } 3629 } 3630 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3631 ierr = PetscFree(vals);CHKERRQ(ierr); 3632 ierr = PetscFree(mycols);CHKERRQ(ierr); 3633 ierr = PetscFree(rowners);CHKERRQ(ierr); 3634 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3635 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3636 3637 PetscFunctionReturn(0); 3638 } 3639 3640 #undef __FUNCT__ 3641 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3642 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3643 { 3644 PetscErrorCode ierr; 3645 IS iscol_local; 3646 PetscInt csize; 3647 3648 PetscFunctionBegin; 3649 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3650 if (call == MAT_REUSE_MATRIX) { 3651 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3652 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3653 } else { 3654 PetscInt cbs; 3655 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3656 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3657 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3658 } 3659 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3660 if (call == MAT_INITIAL_MATRIX) { 3661 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3662 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3663 } 3664 PetscFunctionReturn(0); 3665 } 3666 3667 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3668 #undef __FUNCT__ 3669 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3670 /* 3671 Not great since it makes two copies of the submatrix, first an SeqAIJ 3672 in local and then by concatenating the local matrices the end result. 3673 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3674 3675 Note: This requires a sequential iscol with all indices. 3676 */ 3677 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3678 { 3679 PetscErrorCode ierr; 3680 PetscMPIInt rank,size; 3681 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3682 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3683 PetscBool allcolumns, colflag; 3684 Mat M,Mreuse; 3685 MatScalar *vwork,*aa; 3686 MPI_Comm comm = ((PetscObject)mat)->comm; 3687 Mat_SeqAIJ *aij; 3688 3689 3690 PetscFunctionBegin; 3691 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3692 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3693 3694 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3695 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3696 if (colflag && ncol == mat->cmap->N){ 3697 allcolumns = PETSC_TRUE; 3698 } else { 3699 allcolumns = PETSC_FALSE; 3700 } 3701 if (call == MAT_REUSE_MATRIX) { 3702 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3703 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3704 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3705 } else { 3706 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3707 } 3708 3709 /* 3710 m - number of local rows 3711 n - number of columns (same on all processors) 3712 rstart - first row in new global matrix generated 3713 */ 3714 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3715 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3716 if (call == MAT_INITIAL_MATRIX) { 3717 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3718 ii = aij->i; 3719 jj = aij->j; 3720 3721 /* 3722 Determine the number of non-zeros in the diagonal and off-diagonal 3723 portions of the matrix in order to do correct preallocation 3724 */ 3725 3726 /* first get start and end of "diagonal" columns */ 3727 if (csize == PETSC_DECIDE) { 3728 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3729 if (mglobal == n) { /* square matrix */ 3730 nlocal = m; 3731 } else { 3732 nlocal = n/size + ((n % size) > rank); 3733 } 3734 } else { 3735 nlocal = csize; 3736 } 3737 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3738 rstart = rend - nlocal; 3739 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3740 3741 /* next, compute all the lengths */ 3742 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3743 olens = dlens + m; 3744 for (i=0; i<m; i++) { 3745 jend = ii[i+1] - ii[i]; 3746 olen = 0; 3747 dlen = 0; 3748 for (j=0; j<jend; j++) { 3749 if (*jj < rstart || *jj >= rend) olen++; 3750 else dlen++; 3751 jj++; 3752 } 3753 olens[i] = olen; 3754 dlens[i] = dlen; 3755 } 3756 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3757 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3758 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3759 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3760 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3761 ierr = PetscFree(dlens);CHKERRQ(ierr); 3762 } else { 3763 PetscInt ml,nl; 3764 3765 M = *newmat; 3766 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3767 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3768 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3769 /* 3770 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3771 rather than the slower MatSetValues(). 3772 */ 3773 M->was_assembled = PETSC_TRUE; 3774 M->assembled = PETSC_FALSE; 3775 } 3776 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3777 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3778 ii = aij->i; 3779 jj = aij->j; 3780 aa = aij->a; 3781 for (i=0; i<m; i++) { 3782 row = rstart + i; 3783 nz = ii[i+1] - ii[i]; 3784 cwork = jj; jj += nz; 3785 vwork = aa; aa += nz; 3786 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3787 } 3788 3789 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3790 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3791 *newmat = M; 3792 3793 /* save submatrix used in processor for next request */ 3794 if (call == MAT_INITIAL_MATRIX) { 3795 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3796 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3797 } 3798 3799 PetscFunctionReturn(0); 3800 } 3801 3802 EXTERN_C_BEGIN 3803 #undef __FUNCT__ 3804 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3805 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3806 { 3807 PetscInt m,cstart, cend,j,nnz,i,d; 3808 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3809 const PetscInt *JJ; 3810 PetscScalar *values; 3811 PetscErrorCode ierr; 3812 3813 PetscFunctionBegin; 3814 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3815 3816 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3817 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3818 m = B->rmap->n; 3819 cstart = B->cmap->rstart; 3820 cend = B->cmap->rend; 3821 rstart = B->rmap->rstart; 3822 3823 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3824 3825 #if defined(PETSC_USE_DEBUGGING) 3826 for (i=0; i<m; i++) { 3827 nnz = Ii[i+1]- Ii[i]; 3828 JJ = J + Ii[i]; 3829 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3830 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3831 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3832 } 3833 #endif 3834 3835 for (i=0; i<m; i++) { 3836 nnz = Ii[i+1]- Ii[i]; 3837 JJ = J + Ii[i]; 3838 nnz_max = PetscMax(nnz_max,nnz); 3839 d = 0; 3840 for (j=0; j<nnz; j++) { 3841 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3842 } 3843 d_nnz[i] = d; 3844 o_nnz[i] = nnz - d; 3845 } 3846 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3847 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3848 3849 if (v) values = (PetscScalar*)v; 3850 else { 3851 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3852 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3853 } 3854 3855 for (i=0; i<m; i++) { 3856 ii = i + rstart; 3857 nnz = Ii[i+1]- Ii[i]; 3858 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3859 } 3860 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3861 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3862 3863 if (!v) { 3864 ierr = PetscFree(values);CHKERRQ(ierr); 3865 } 3866 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3867 PetscFunctionReturn(0); 3868 } 3869 EXTERN_C_END 3870 3871 #undef __FUNCT__ 3872 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3873 /*@ 3874 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3875 (the default parallel PETSc format). 3876 3877 Collective on MPI_Comm 3878 3879 Input Parameters: 3880 + B - the matrix 3881 . i - the indices into j for the start of each local row (starts with zero) 3882 . j - the column indices for each local row (starts with zero) 3883 - v - optional values in the matrix 3884 3885 Level: developer 3886 3887 Notes: 3888 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3889 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3890 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3891 3892 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3893 3894 The format which is used for the sparse matrix input, is equivalent to a 3895 row-major ordering.. i.e for the following matrix, the input data expected is 3896 as shown: 3897 3898 1 0 0 3899 2 0 3 P0 3900 ------- 3901 4 5 6 P1 3902 3903 Process0 [P0]: rows_owned=[0,1] 3904 i = {0,1,3} [size = nrow+1 = 2+1] 3905 j = {0,0,2} [size = nz = 6] 3906 v = {1,2,3} [size = nz = 6] 3907 3908 Process1 [P1]: rows_owned=[2] 3909 i = {0,3} [size = nrow+1 = 1+1] 3910 j = {0,1,2} [size = nz = 6] 3911 v = {4,5,6} [size = nz = 6] 3912 3913 .keywords: matrix, aij, compressed row, sparse, parallel 3914 3915 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3916 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3917 @*/ 3918 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3919 { 3920 PetscErrorCode ierr; 3921 3922 PetscFunctionBegin; 3923 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3924 PetscFunctionReturn(0); 3925 } 3926 3927 #undef __FUNCT__ 3928 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3929 /*@C 3930 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3931 (the default parallel PETSc format). For good matrix assembly performance 3932 the user should preallocate the matrix storage by setting the parameters 3933 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3934 performance can be increased by more than a factor of 50. 3935 3936 Collective on MPI_Comm 3937 3938 Input Parameters: 3939 + A - the matrix 3940 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3941 (same value is used for all local rows) 3942 . d_nnz - array containing the number of nonzeros in the various rows of the 3943 DIAGONAL portion of the local submatrix (possibly different for each row) 3944 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3945 The size of this array is equal to the number of local rows, i.e 'm'. 3946 For matrices that will be factored, you must leave room for (and set) 3947 the diagonal entry even if it is zero. 3948 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3949 submatrix (same value is used for all local rows). 3950 - o_nnz - array containing the number of nonzeros in the various rows of the 3951 OFF-DIAGONAL portion of the local submatrix (possibly different for 3952 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3953 structure. The size of this array is equal to the number 3954 of local rows, i.e 'm'. 3955 3956 If the *_nnz parameter is given then the *_nz parameter is ignored 3957 3958 The AIJ format (also called the Yale sparse matrix format or 3959 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3960 storage. The stored row and column indices begin with zero. 3961 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3962 3963 The parallel matrix is partitioned such that the first m0 rows belong to 3964 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3965 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3966 3967 The DIAGONAL portion of the local submatrix of a processor can be defined 3968 as the submatrix which is obtained by extraction the part corresponding to 3969 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3970 first row that belongs to the processor, r2 is the last row belonging to 3971 the this processor, and c1-c2 is range of indices of the local part of a 3972 vector suitable for applying the matrix to. This is an mxn matrix. In the 3973 common case of a square matrix, the row and column ranges are the same and 3974 the DIAGONAL part is also square. The remaining portion of the local 3975 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3976 3977 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3978 3979 You can call MatGetInfo() to get information on how effective the preallocation was; 3980 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3981 You can also run with the option -info and look for messages with the string 3982 malloc in them to see if additional memory allocation was needed. 3983 3984 Example usage: 3985 3986 Consider the following 8x8 matrix with 34 non-zero values, that is 3987 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3988 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3989 as follows: 3990 3991 .vb 3992 1 2 0 | 0 3 0 | 0 4 3993 Proc0 0 5 6 | 7 0 0 | 8 0 3994 9 0 10 | 11 0 0 | 12 0 3995 ------------------------------------- 3996 13 0 14 | 15 16 17 | 0 0 3997 Proc1 0 18 0 | 19 20 21 | 0 0 3998 0 0 0 | 22 23 0 | 24 0 3999 ------------------------------------- 4000 Proc2 25 26 27 | 0 0 28 | 29 0 4001 30 0 0 | 31 32 33 | 0 34 4002 .ve 4003 4004 This can be represented as a collection of submatrices as: 4005 4006 .vb 4007 A B C 4008 D E F 4009 G H I 4010 .ve 4011 4012 Where the submatrices A,B,C are owned by proc0, D,E,F are 4013 owned by proc1, G,H,I are owned by proc2. 4014 4015 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4016 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4017 The 'M','N' parameters are 8,8, and have the same values on all procs. 4018 4019 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4020 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4021 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4022 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4023 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4024 matrix, ans [DF] as another SeqAIJ matrix. 4025 4026 When d_nz, o_nz parameters are specified, d_nz storage elements are 4027 allocated for every row of the local diagonal submatrix, and o_nz 4028 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4029 One way to choose d_nz and o_nz is to use the max nonzerors per local 4030 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4031 In this case, the values of d_nz,o_nz are: 4032 .vb 4033 proc0 : dnz = 2, o_nz = 2 4034 proc1 : dnz = 3, o_nz = 2 4035 proc2 : dnz = 1, o_nz = 4 4036 .ve 4037 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4038 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4039 for proc3. i.e we are using 12+15+10=37 storage locations to store 4040 34 values. 4041 4042 When d_nnz, o_nnz parameters are specified, the storage is specified 4043 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4044 In the above case the values for d_nnz,o_nnz are: 4045 .vb 4046 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4047 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4048 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4049 .ve 4050 Here the space allocated is sum of all the above values i.e 34, and 4051 hence pre-allocation is perfect. 4052 4053 Level: intermediate 4054 4055 .keywords: matrix, aij, compressed row, sparse, parallel 4056 4057 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4058 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4059 @*/ 4060 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4061 { 4062 PetscErrorCode ierr; 4063 4064 PetscFunctionBegin; 4065 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4066 PetscValidType(B,1); 4067 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4068 PetscFunctionReturn(0); 4069 } 4070 4071 #undef __FUNCT__ 4072 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4073 /*@ 4074 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4075 CSR format the local rows. 4076 4077 Collective on MPI_Comm 4078 4079 Input Parameters: 4080 + comm - MPI communicator 4081 . m - number of local rows (Cannot be PETSC_DECIDE) 4082 . n - This value should be the same as the local size used in creating the 4083 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4084 calculated if N is given) For square matrices n is almost always m. 4085 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4086 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4087 . i - row indices 4088 . j - column indices 4089 - a - matrix values 4090 4091 Output Parameter: 4092 . mat - the matrix 4093 4094 Level: intermediate 4095 4096 Notes: 4097 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4098 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4099 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4100 4101 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4102 4103 The format which is used for the sparse matrix input, is equivalent to a 4104 row-major ordering.. i.e for the following matrix, the input data expected is 4105 as shown: 4106 4107 1 0 0 4108 2 0 3 P0 4109 ------- 4110 4 5 6 P1 4111 4112 Process0 [P0]: rows_owned=[0,1] 4113 i = {0,1,3} [size = nrow+1 = 2+1] 4114 j = {0,0,2} [size = nz = 6] 4115 v = {1,2,3} [size = nz = 6] 4116 4117 Process1 [P1]: rows_owned=[2] 4118 i = {0,3} [size = nrow+1 = 1+1] 4119 j = {0,1,2} [size = nz = 6] 4120 v = {4,5,6} [size = nz = 6] 4121 4122 .keywords: matrix, aij, compressed row, sparse, parallel 4123 4124 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4125 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4126 @*/ 4127 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4128 { 4129 PetscErrorCode ierr; 4130 4131 PetscFunctionBegin; 4132 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4133 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4134 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4135 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4136 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4137 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4138 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4139 PetscFunctionReturn(0); 4140 } 4141 4142 #undef __FUNCT__ 4143 #define __FUNCT__ "MatCreateAIJ" 4144 /*@C 4145 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4146 (the default parallel PETSc format). For good matrix assembly performance 4147 the user should preallocate the matrix storage by setting the parameters 4148 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4149 performance can be increased by more than a factor of 50. 4150 4151 Collective on MPI_Comm 4152 4153 Input Parameters: 4154 + comm - MPI communicator 4155 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4156 This value should be the same as the local size used in creating the 4157 y vector for the matrix-vector product y = Ax. 4158 . n - This value should be the same as the local size used in creating the 4159 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4160 calculated if N is given) For square matrices n is almost always m. 4161 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4162 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4163 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4164 (same value is used for all local rows) 4165 . d_nnz - array containing the number of nonzeros in the various rows of the 4166 DIAGONAL portion of the local submatrix (possibly different for each row) 4167 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4168 The size of this array is equal to the number of local rows, i.e 'm'. 4169 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4170 submatrix (same value is used for all local rows). 4171 - o_nnz - array containing the number of nonzeros in the various rows of the 4172 OFF-DIAGONAL portion of the local submatrix (possibly different for 4173 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4174 structure. The size of this array is equal to the number 4175 of local rows, i.e 'm'. 4176 4177 Output Parameter: 4178 . A - the matrix 4179 4180 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4181 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4182 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4183 4184 Notes: 4185 If the *_nnz parameter is given then the *_nz parameter is ignored 4186 4187 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4188 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4189 storage requirements for this matrix. 4190 4191 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4192 processor than it must be used on all processors that share the object for 4193 that argument. 4194 4195 The user MUST specify either the local or global matrix dimensions 4196 (possibly both). 4197 4198 The parallel matrix is partitioned across processors such that the 4199 first m0 rows belong to process 0, the next m1 rows belong to 4200 process 1, the next m2 rows belong to process 2 etc.. where 4201 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4202 values corresponding to [m x N] submatrix. 4203 4204 The columns are logically partitioned with the n0 columns belonging 4205 to 0th partition, the next n1 columns belonging to the next 4206 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4207 4208 The DIAGONAL portion of the local submatrix on any given processor 4209 is the submatrix corresponding to the rows and columns m,n 4210 corresponding to the given processor. i.e diagonal matrix on 4211 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4212 etc. The remaining portion of the local submatrix [m x (N-n)] 4213 constitute the OFF-DIAGONAL portion. The example below better 4214 illustrates this concept. 4215 4216 For a square global matrix we define each processor's diagonal portion 4217 to be its local rows and the corresponding columns (a square submatrix); 4218 each processor's off-diagonal portion encompasses the remainder of the 4219 local matrix (a rectangular submatrix). 4220 4221 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4222 4223 When calling this routine with a single process communicator, a matrix of 4224 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4225 type of communicator, use the construction mechanism: 4226 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4227 4228 By default, this format uses inodes (identical nodes) when possible. 4229 We search for consecutive rows with the same nonzero structure, thereby 4230 reusing matrix information to achieve increased efficiency. 4231 4232 Options Database Keys: 4233 + -mat_no_inode - Do not use inodes 4234 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4235 - -mat_aij_oneindex - Internally use indexing starting at 1 4236 rather than 0. Note that when calling MatSetValues(), 4237 the user still MUST index entries starting at 0! 4238 4239 4240 Example usage: 4241 4242 Consider the following 8x8 matrix with 34 non-zero values, that is 4243 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4244 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4245 as follows: 4246 4247 .vb 4248 1 2 0 | 0 3 0 | 0 4 4249 Proc0 0 5 6 | 7 0 0 | 8 0 4250 9 0 10 | 11 0 0 | 12 0 4251 ------------------------------------- 4252 13 0 14 | 15 16 17 | 0 0 4253 Proc1 0 18 0 | 19 20 21 | 0 0 4254 0 0 0 | 22 23 0 | 24 0 4255 ------------------------------------- 4256 Proc2 25 26 27 | 0 0 28 | 29 0 4257 30 0 0 | 31 32 33 | 0 34 4258 .ve 4259 4260 This can be represented as a collection of submatrices as: 4261 4262 .vb 4263 A B C 4264 D E F 4265 G H I 4266 .ve 4267 4268 Where the submatrices A,B,C are owned by proc0, D,E,F are 4269 owned by proc1, G,H,I are owned by proc2. 4270 4271 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4272 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4273 The 'M','N' parameters are 8,8, and have the same values on all procs. 4274 4275 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4276 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4277 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4278 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4279 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4280 matrix, ans [DF] as another SeqAIJ matrix. 4281 4282 When d_nz, o_nz parameters are specified, d_nz storage elements are 4283 allocated for every row of the local diagonal submatrix, and o_nz 4284 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4285 One way to choose d_nz and o_nz is to use the max nonzerors per local 4286 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4287 In this case, the values of d_nz,o_nz are: 4288 .vb 4289 proc0 : dnz = 2, o_nz = 2 4290 proc1 : dnz = 3, o_nz = 2 4291 proc2 : dnz = 1, o_nz = 4 4292 .ve 4293 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4294 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4295 for proc3. i.e we are using 12+15+10=37 storage locations to store 4296 34 values. 4297 4298 When d_nnz, o_nnz parameters are specified, the storage is specified 4299 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4300 In the above case the values for d_nnz,o_nnz are: 4301 .vb 4302 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4303 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4304 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4305 .ve 4306 Here the space allocated is sum of all the above values i.e 34, and 4307 hence pre-allocation is perfect. 4308 4309 Level: intermediate 4310 4311 .keywords: matrix, aij, compressed row, sparse, parallel 4312 4313 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4314 MPIAIJ, MatCreateMPIAIJWithArrays() 4315 @*/ 4316 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4317 { 4318 PetscErrorCode ierr; 4319 PetscMPIInt size; 4320 4321 PetscFunctionBegin; 4322 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4323 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4324 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4325 if (size > 1) { 4326 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4327 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4328 } else { 4329 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4330 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4331 } 4332 PetscFunctionReturn(0); 4333 } 4334 4335 #undef __FUNCT__ 4336 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4337 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4338 { 4339 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4340 4341 PetscFunctionBegin; 4342 *Ad = a->A; 4343 *Ao = a->B; 4344 *colmap = a->garray; 4345 PetscFunctionReturn(0); 4346 } 4347 4348 #undef __FUNCT__ 4349 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4350 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4351 { 4352 PetscErrorCode ierr; 4353 PetscInt i; 4354 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4355 4356 PetscFunctionBegin; 4357 if (coloring->ctype == IS_COLORING_GLOBAL) { 4358 ISColoringValue *allcolors,*colors; 4359 ISColoring ocoloring; 4360 4361 /* set coloring for diagonal portion */ 4362 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4363 4364 /* set coloring for off-diagonal portion */ 4365 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4366 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4367 for (i=0; i<a->B->cmap->n; i++) { 4368 colors[i] = allcolors[a->garray[i]]; 4369 } 4370 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4371 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4372 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4373 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4374 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4375 ISColoringValue *colors; 4376 PetscInt *larray; 4377 ISColoring ocoloring; 4378 4379 /* set coloring for diagonal portion */ 4380 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4381 for (i=0; i<a->A->cmap->n; i++) { 4382 larray[i] = i + A->cmap->rstart; 4383 } 4384 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4385 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4386 for (i=0; i<a->A->cmap->n; i++) { 4387 colors[i] = coloring->colors[larray[i]]; 4388 } 4389 ierr = PetscFree(larray);CHKERRQ(ierr); 4390 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4391 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4392 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4393 4394 /* set coloring for off-diagonal portion */ 4395 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4396 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4397 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4398 for (i=0; i<a->B->cmap->n; i++) { 4399 colors[i] = coloring->colors[larray[i]]; 4400 } 4401 ierr = PetscFree(larray);CHKERRQ(ierr); 4402 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4403 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4404 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4405 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4406 4407 PetscFunctionReturn(0); 4408 } 4409 4410 #if defined(PETSC_HAVE_ADIC) 4411 #undef __FUNCT__ 4412 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4413 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4414 { 4415 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4416 PetscErrorCode ierr; 4417 4418 PetscFunctionBegin; 4419 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4420 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4421 PetscFunctionReturn(0); 4422 } 4423 #endif 4424 4425 #undef __FUNCT__ 4426 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4427 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4428 { 4429 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4430 PetscErrorCode ierr; 4431 4432 PetscFunctionBegin; 4433 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4434 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4435 PetscFunctionReturn(0); 4436 } 4437 4438 #undef __FUNCT__ 4439 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4440 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4441 { 4442 PetscErrorCode ierr; 4443 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4444 PetscInt *indx; 4445 4446 PetscFunctionBegin; 4447 /* This routine will ONLY return MPIAIJ type matrix */ 4448 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4449 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4450 if (n == PETSC_DECIDE){ 4451 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4452 } 4453 /* Check sum(n) = N */ 4454 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4455 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4456 4457 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4458 rstart -= m; 4459 4460 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4461 for (i=0;i<m;i++) { 4462 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4463 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4464 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4465 } 4466 4467 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4468 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4469 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4470 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4471 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4472 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4473 PetscFunctionReturn(0); 4474 } 4475 4476 #undef __FUNCT__ 4477 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4478 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4479 { 4480 PetscErrorCode ierr; 4481 PetscInt m,N,i,rstart,nnz,Ii; 4482 PetscInt *indx; 4483 PetscScalar *values; 4484 4485 PetscFunctionBegin; 4486 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4487 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4488 for (i=0;i<m;i++) { 4489 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4490 Ii = i + rstart; 4491 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4492 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4493 } 4494 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4495 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4496 PetscFunctionReturn(0); 4497 } 4498 4499 #undef __FUNCT__ 4500 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4501 /*@ 4502 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4503 matrices from each processor 4504 4505 Collective on MPI_Comm 4506 4507 Input Parameters: 4508 + comm - the communicators the parallel matrix will live on 4509 . inmat - the input sequential matrices 4510 . n - number of local columns (or PETSC_DECIDE) 4511 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4512 4513 Output Parameter: 4514 . outmat - the parallel matrix generated 4515 4516 Level: advanced 4517 4518 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4519 4520 @*/ 4521 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4522 { 4523 PetscErrorCode ierr; 4524 4525 PetscFunctionBegin; 4526 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4527 if (scall == MAT_INITIAL_MATRIX){ 4528 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4529 } 4530 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4531 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4532 PetscFunctionReturn(0); 4533 } 4534 4535 #undef __FUNCT__ 4536 #define __FUNCT__ "MatFileSplit" 4537 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4538 { 4539 PetscErrorCode ierr; 4540 PetscMPIInt rank; 4541 PetscInt m,N,i,rstart,nnz; 4542 size_t len; 4543 const PetscInt *indx; 4544 PetscViewer out; 4545 char *name; 4546 Mat B; 4547 const PetscScalar *values; 4548 4549 PetscFunctionBegin; 4550 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4551 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4552 /* Should this be the type of the diagonal block of A? */ 4553 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4554 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4555 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4556 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4557 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4558 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4559 for (i=0;i<m;i++) { 4560 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4561 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4562 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4563 } 4564 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4565 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4566 4567 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4568 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4569 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4570 sprintf(name,"%s.%d",outfile,rank); 4571 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4572 ierr = PetscFree(name);CHKERRQ(ierr); 4573 ierr = MatView(B,out);CHKERRQ(ierr); 4574 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4575 ierr = MatDestroy(&B);CHKERRQ(ierr); 4576 PetscFunctionReturn(0); 4577 } 4578 4579 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4580 #undef __FUNCT__ 4581 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4582 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4583 { 4584 PetscErrorCode ierr; 4585 Mat_Merge_SeqsToMPI *merge; 4586 PetscContainer container; 4587 4588 PetscFunctionBegin; 4589 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4590 if (container) { 4591 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4592 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4593 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4594 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4595 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4596 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4597 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4598 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4599 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4600 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4601 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4602 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4603 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4604 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4605 ierr = PetscFree(merge);CHKERRQ(ierr); 4606 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4607 } 4608 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4609 PetscFunctionReturn(0); 4610 } 4611 4612 #include <../src/mat/utils/freespace.h> 4613 #include <petscbt.h> 4614 4615 #undef __FUNCT__ 4616 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4617 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4618 { 4619 PetscErrorCode ierr; 4620 MPI_Comm comm=((PetscObject)mpimat)->comm; 4621 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4622 PetscMPIInt size,rank,taga,*len_s; 4623 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4624 PetscInt proc,m; 4625 PetscInt **buf_ri,**buf_rj; 4626 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4627 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4628 MPI_Request *s_waits,*r_waits; 4629 MPI_Status *status; 4630 MatScalar *aa=a->a; 4631 MatScalar **abuf_r,*ba_i; 4632 Mat_Merge_SeqsToMPI *merge; 4633 PetscContainer container; 4634 4635 PetscFunctionBegin; 4636 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4637 4638 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4639 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4640 4641 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4642 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4643 4644 bi = merge->bi; 4645 bj = merge->bj; 4646 buf_ri = merge->buf_ri; 4647 buf_rj = merge->buf_rj; 4648 4649 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4650 owners = merge->rowmap->range; 4651 len_s = merge->len_s; 4652 4653 /* send and recv matrix values */ 4654 /*-----------------------------*/ 4655 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4656 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4657 4658 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4659 for (proc=0,k=0; proc<size; proc++){ 4660 if (!len_s[proc]) continue; 4661 i = owners[proc]; 4662 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4663 k++; 4664 } 4665 4666 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4667 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4668 ierr = PetscFree(status);CHKERRQ(ierr); 4669 4670 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4671 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4672 4673 /* insert mat values of mpimat */ 4674 /*----------------------------*/ 4675 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4676 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4677 4678 for (k=0; k<merge->nrecv; k++){ 4679 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4680 nrows = *(buf_ri_k[k]); 4681 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4682 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4683 } 4684 4685 /* set values of ba */ 4686 m = merge->rowmap->n; 4687 for (i=0; i<m; i++) { 4688 arow = owners[rank] + i; 4689 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4690 bnzi = bi[i+1] - bi[i]; 4691 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4692 4693 /* add local non-zero vals of this proc's seqmat into ba */ 4694 anzi = ai[arow+1] - ai[arow]; 4695 aj = a->j + ai[arow]; 4696 aa = a->a + ai[arow]; 4697 nextaj = 0; 4698 for (j=0; nextaj<anzi; j++){ 4699 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4700 ba_i[j] += aa[nextaj++]; 4701 } 4702 } 4703 4704 /* add received vals into ba */ 4705 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4706 /* i-th row */ 4707 if (i == *nextrow[k]) { 4708 anzi = *(nextai[k]+1) - *nextai[k]; 4709 aj = buf_rj[k] + *(nextai[k]); 4710 aa = abuf_r[k] + *(nextai[k]); 4711 nextaj = 0; 4712 for (j=0; nextaj<anzi; j++){ 4713 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4714 ba_i[j] += aa[nextaj++]; 4715 } 4716 } 4717 nextrow[k]++; nextai[k]++; 4718 } 4719 } 4720 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4721 } 4722 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4723 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4724 4725 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4726 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4727 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4728 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4729 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4730 PetscFunctionReturn(0); 4731 } 4732 4733 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4734 4735 #undef __FUNCT__ 4736 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4737 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4738 { 4739 PetscErrorCode ierr; 4740 Mat B_mpi; 4741 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4742 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4743 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4744 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4745 PetscInt len,proc,*dnz,*onz,bs,cbs; 4746 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4747 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4748 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4749 MPI_Status *status; 4750 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4751 PetscBT lnkbt; 4752 Mat_Merge_SeqsToMPI *merge; 4753 PetscContainer container; 4754 4755 PetscFunctionBegin; 4756 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4757 4758 /* make sure it is a PETSc comm */ 4759 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4760 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4761 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4762 4763 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4764 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4765 4766 /* determine row ownership */ 4767 /*---------------------------------------------------------*/ 4768 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4769 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4770 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4771 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4772 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4773 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4774 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4775 4776 m = merge->rowmap->n; 4777 owners = merge->rowmap->range; 4778 4779 /* determine the number of messages to send, their lengths */ 4780 /*---------------------------------------------------------*/ 4781 len_s = merge->len_s; 4782 4783 len = 0; /* length of buf_si[] */ 4784 merge->nsend = 0; 4785 for (proc=0; proc<size; proc++){ 4786 len_si[proc] = 0; 4787 if (proc == rank){ 4788 len_s[proc] = 0; 4789 } else { 4790 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4791 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4792 } 4793 if (len_s[proc]) { 4794 merge->nsend++; 4795 nrows = 0; 4796 for (i=owners[proc]; i<owners[proc+1]; i++){ 4797 if (ai[i+1] > ai[i]) nrows++; 4798 } 4799 len_si[proc] = 2*(nrows+1); 4800 len += len_si[proc]; 4801 } 4802 } 4803 4804 /* determine the number and length of messages to receive for ij-structure */ 4805 /*-------------------------------------------------------------------------*/ 4806 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4807 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4808 4809 /* post the Irecv of j-structure */ 4810 /*-------------------------------*/ 4811 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4812 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4813 4814 /* post the Isend of j-structure */ 4815 /*--------------------------------*/ 4816 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4817 4818 for (proc=0, k=0; proc<size; proc++){ 4819 if (!len_s[proc]) continue; 4820 i = owners[proc]; 4821 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4822 k++; 4823 } 4824 4825 /* receives and sends of j-structure are complete */ 4826 /*------------------------------------------------*/ 4827 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4828 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4829 4830 /* send and recv i-structure */ 4831 /*---------------------------*/ 4832 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4833 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4834 4835 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4836 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4837 for (proc=0,k=0; proc<size; proc++){ 4838 if (!len_s[proc]) continue; 4839 /* form outgoing message for i-structure: 4840 buf_si[0]: nrows to be sent 4841 [1:nrows]: row index (global) 4842 [nrows+1:2*nrows+1]: i-structure index 4843 */ 4844 /*-------------------------------------------*/ 4845 nrows = len_si[proc]/2 - 1; 4846 buf_si_i = buf_si + nrows+1; 4847 buf_si[0] = nrows; 4848 buf_si_i[0] = 0; 4849 nrows = 0; 4850 for (i=owners[proc]; i<owners[proc+1]; i++){ 4851 anzi = ai[i+1] - ai[i]; 4852 if (anzi) { 4853 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4854 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4855 nrows++; 4856 } 4857 } 4858 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4859 k++; 4860 buf_si += len_si[proc]; 4861 } 4862 4863 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4864 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4865 4866 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4867 for (i=0; i<merge->nrecv; i++){ 4868 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4869 } 4870 4871 ierr = PetscFree(len_si);CHKERRQ(ierr); 4872 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4873 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4874 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4875 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4876 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4877 ierr = PetscFree(status);CHKERRQ(ierr); 4878 4879 /* compute a local seq matrix in each processor */ 4880 /*----------------------------------------------*/ 4881 /* allocate bi array and free space for accumulating nonzero column info */ 4882 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4883 bi[0] = 0; 4884 4885 /* create and initialize a linked list */ 4886 nlnk = N+1; 4887 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4888 4889 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4890 len = ai[owners[rank+1]] - ai[owners[rank]]; 4891 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4892 current_space = free_space; 4893 4894 /* determine symbolic info for each local row */ 4895 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4896 4897 for (k=0; k<merge->nrecv; k++){ 4898 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4899 nrows = *buf_ri_k[k]; 4900 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4901 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4902 } 4903 4904 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4905 len = 0; 4906 for (i=0;i<m;i++) { 4907 bnzi = 0; 4908 /* add local non-zero cols of this proc's seqmat into lnk */ 4909 arow = owners[rank] + i; 4910 anzi = ai[arow+1] - ai[arow]; 4911 aj = a->j + ai[arow]; 4912 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4913 bnzi += nlnk; 4914 /* add received col data into lnk */ 4915 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4916 if (i == *nextrow[k]) { /* i-th row */ 4917 anzi = *(nextai[k]+1) - *nextai[k]; 4918 aj = buf_rj[k] + *nextai[k]; 4919 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4920 bnzi += nlnk; 4921 nextrow[k]++; nextai[k]++; 4922 } 4923 } 4924 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4925 4926 /* if free space is not available, make more free space */ 4927 if (current_space->local_remaining<bnzi) { 4928 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4929 nspacedouble++; 4930 } 4931 /* copy data into free space, then initialize lnk */ 4932 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4933 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4934 4935 current_space->array += bnzi; 4936 current_space->local_used += bnzi; 4937 current_space->local_remaining -= bnzi; 4938 4939 bi[i+1] = bi[i] + bnzi; 4940 } 4941 4942 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4943 4944 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4945 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4946 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4947 4948 /* create symbolic parallel matrix B_mpi */ 4949 /*---------------------------------------*/ 4950 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4951 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4952 if (n==PETSC_DECIDE) { 4953 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4954 } else { 4955 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4956 } 4957 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4958 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4959 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4960 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4961 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4962 4963 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4964 B_mpi->assembled = PETSC_FALSE; 4965 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4966 merge->bi = bi; 4967 merge->bj = bj; 4968 merge->buf_ri = buf_ri; 4969 merge->buf_rj = buf_rj; 4970 merge->coi = PETSC_NULL; 4971 merge->coj = PETSC_NULL; 4972 merge->owners_co = PETSC_NULL; 4973 4974 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4975 4976 /* attach the supporting struct to B_mpi for reuse */ 4977 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4978 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4979 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4980 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4981 *mpimat = B_mpi; 4982 4983 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4984 PetscFunctionReturn(0); 4985 } 4986 4987 #undef __FUNCT__ 4988 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4989 /*@C 4990 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4991 matrices from each processor 4992 4993 Collective on MPI_Comm 4994 4995 Input Parameters: 4996 + comm - the communicators the parallel matrix will live on 4997 . seqmat - the input sequential matrices 4998 . m - number of local rows (or PETSC_DECIDE) 4999 . n - number of local columns (or PETSC_DECIDE) 5000 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5001 5002 Output Parameter: 5003 . mpimat - the parallel matrix generated 5004 5005 Level: advanced 5006 5007 Notes: 5008 The dimensions of the sequential matrix in each processor MUST be the same. 5009 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5010 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5011 @*/ 5012 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5013 { 5014 PetscErrorCode ierr; 5015 PetscMPIInt size; 5016 5017 PetscFunctionBegin; 5018 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5019 if (size == 1){ 5020 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5021 if (scall == MAT_INITIAL_MATRIX){ 5022 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5023 } else { 5024 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5025 } 5026 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5027 PetscFunctionReturn(0); 5028 } 5029 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5030 if (scall == MAT_INITIAL_MATRIX){ 5031 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5032 } 5033 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5034 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5035 PetscFunctionReturn(0); 5036 } 5037 5038 #undef __FUNCT__ 5039 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5040 /*@ 5041 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5042 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5043 with MatGetSize() 5044 5045 Not Collective 5046 5047 Input Parameters: 5048 + A - the matrix 5049 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5050 5051 Output Parameter: 5052 . A_loc - the local sequential matrix generated 5053 5054 Level: developer 5055 5056 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5057 5058 @*/ 5059 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5060 { 5061 PetscErrorCode ierr; 5062 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5063 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5064 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5065 MatScalar *aa=a->a,*ba=b->a,*cam; 5066 PetscScalar *ca; 5067 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5068 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5069 PetscBool match; 5070 5071 PetscFunctionBegin; 5072 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5073 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5074 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5075 if (scall == MAT_INITIAL_MATRIX){ 5076 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5077 ci[0] = 0; 5078 for (i=0; i<am; i++){ 5079 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5080 } 5081 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5082 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5083 k = 0; 5084 for (i=0; i<am; i++) { 5085 ncols_o = bi[i+1] - bi[i]; 5086 ncols_d = ai[i+1] - ai[i]; 5087 /* off-diagonal portion of A */ 5088 for (jo=0; jo<ncols_o; jo++) { 5089 col = cmap[*bj]; 5090 if (col >= cstart) break; 5091 cj[k] = col; bj++; 5092 ca[k++] = *ba++; 5093 } 5094 /* diagonal portion of A */ 5095 for (j=0; j<ncols_d; j++) { 5096 cj[k] = cstart + *aj++; 5097 ca[k++] = *aa++; 5098 } 5099 /* off-diagonal portion of A */ 5100 for (j=jo; j<ncols_o; j++) { 5101 cj[k] = cmap[*bj++]; 5102 ca[k++] = *ba++; 5103 } 5104 } 5105 /* put together the new matrix */ 5106 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5107 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5108 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5109 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5110 mat->free_a = PETSC_TRUE; 5111 mat->free_ij = PETSC_TRUE; 5112 mat->nonew = 0; 5113 } else if (scall == MAT_REUSE_MATRIX){ 5114 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5115 ci = mat->i; cj = mat->j; cam = mat->a; 5116 for (i=0; i<am; i++) { 5117 /* off-diagonal portion of A */ 5118 ncols_o = bi[i+1] - bi[i]; 5119 for (jo=0; jo<ncols_o; jo++) { 5120 col = cmap[*bj]; 5121 if (col >= cstart) break; 5122 *cam++ = *ba++; bj++; 5123 } 5124 /* diagonal portion of A */ 5125 ncols_d = ai[i+1] - ai[i]; 5126 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5127 /* off-diagonal portion of A */ 5128 for (j=jo; j<ncols_o; j++) { 5129 *cam++ = *ba++; bj++; 5130 } 5131 } 5132 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5133 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5134 PetscFunctionReturn(0); 5135 } 5136 5137 #undef __FUNCT__ 5138 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5139 /*@C 5140 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5141 5142 Not Collective 5143 5144 Input Parameters: 5145 + A - the matrix 5146 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5147 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5148 5149 Output Parameter: 5150 . A_loc - the local sequential matrix generated 5151 5152 Level: developer 5153 5154 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5155 5156 @*/ 5157 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5158 { 5159 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5160 PetscErrorCode ierr; 5161 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5162 IS isrowa,iscola; 5163 Mat *aloc; 5164 PetscBool match; 5165 5166 PetscFunctionBegin; 5167 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5168 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5169 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5170 if (!row){ 5171 start = A->rmap->rstart; end = A->rmap->rend; 5172 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5173 } else { 5174 isrowa = *row; 5175 } 5176 if (!col){ 5177 start = A->cmap->rstart; 5178 cmap = a->garray; 5179 nzA = a->A->cmap->n; 5180 nzB = a->B->cmap->n; 5181 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5182 ncols = 0; 5183 for (i=0; i<nzB; i++) { 5184 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5185 else break; 5186 } 5187 imark = i; 5188 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5189 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5190 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5191 } else { 5192 iscola = *col; 5193 } 5194 if (scall != MAT_INITIAL_MATRIX){ 5195 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5196 aloc[0] = *A_loc; 5197 } 5198 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5199 *A_loc = aloc[0]; 5200 ierr = PetscFree(aloc);CHKERRQ(ierr); 5201 if (!row){ 5202 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5203 } 5204 if (!col){ 5205 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5206 } 5207 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5208 PetscFunctionReturn(0); 5209 } 5210 5211 #undef __FUNCT__ 5212 #define __FUNCT__ "MatGetBrowsOfAcols" 5213 /*@C 5214 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5215 5216 Collective on Mat 5217 5218 Input Parameters: 5219 + A,B - the matrices in mpiaij format 5220 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5221 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5222 5223 Output Parameter: 5224 + rowb, colb - index sets of rows and columns of B to extract 5225 - B_seq - the sequential matrix generated 5226 5227 Level: developer 5228 5229 @*/ 5230 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5231 { 5232 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5233 PetscErrorCode ierr; 5234 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5235 IS isrowb,iscolb; 5236 Mat *bseq=PETSC_NULL; 5237 5238 PetscFunctionBegin; 5239 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5240 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5241 } 5242 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5243 5244 if (scall == MAT_INITIAL_MATRIX){ 5245 start = A->cmap->rstart; 5246 cmap = a->garray; 5247 nzA = a->A->cmap->n; 5248 nzB = a->B->cmap->n; 5249 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5250 ncols = 0; 5251 for (i=0; i<nzB; i++) { /* row < local row index */ 5252 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5253 else break; 5254 } 5255 imark = i; 5256 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5257 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5258 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5259 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5260 } else { 5261 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5262 isrowb = *rowb; iscolb = *colb; 5263 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5264 bseq[0] = *B_seq; 5265 } 5266 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5267 *B_seq = bseq[0]; 5268 ierr = PetscFree(bseq);CHKERRQ(ierr); 5269 if (!rowb){ 5270 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5271 } else { 5272 *rowb = isrowb; 5273 } 5274 if (!colb){ 5275 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5276 } else { 5277 *colb = iscolb; 5278 } 5279 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5280 PetscFunctionReturn(0); 5281 } 5282 5283 #undef __FUNCT__ 5284 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5285 /* 5286 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5287 of the OFF-DIAGONAL portion of local A 5288 5289 Collective on Mat 5290 5291 Input Parameters: 5292 + A,B - the matrices in mpiaij format 5293 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5294 5295 Output Parameter: 5296 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5297 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5298 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5299 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5300 5301 Level: developer 5302 5303 */ 5304 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5305 { 5306 VecScatter_MPI_General *gen_to,*gen_from; 5307 PetscErrorCode ierr; 5308 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5309 Mat_SeqAIJ *b_oth; 5310 VecScatter ctx=a->Mvctx; 5311 MPI_Comm comm=((PetscObject)ctx)->comm; 5312 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5313 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5314 PetscScalar *rvalues,*svalues; 5315 MatScalar *b_otha,*bufa,*bufA; 5316 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5317 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5318 MPI_Status *sstatus,rstatus; 5319 PetscMPIInt jj; 5320 PetscInt *cols,sbs,rbs; 5321 PetscScalar *vals; 5322 5323 PetscFunctionBegin; 5324 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5325 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5326 } 5327 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5328 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5329 5330 gen_to = (VecScatter_MPI_General*)ctx->todata; 5331 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5332 rvalues = gen_from->values; /* holds the length of receiving row */ 5333 svalues = gen_to->values; /* holds the length of sending row */ 5334 nrecvs = gen_from->n; 5335 nsends = gen_to->n; 5336 5337 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5338 srow = gen_to->indices; /* local row index to be sent */ 5339 sstarts = gen_to->starts; 5340 sprocs = gen_to->procs; 5341 sstatus = gen_to->sstatus; 5342 sbs = gen_to->bs; 5343 rstarts = gen_from->starts; 5344 rprocs = gen_from->procs; 5345 rbs = gen_from->bs; 5346 5347 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5348 if (scall == MAT_INITIAL_MATRIX){ 5349 /* i-array */ 5350 /*---------*/ 5351 /* post receives */ 5352 for (i=0; i<nrecvs; i++){ 5353 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5354 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5355 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5356 } 5357 5358 /* pack the outgoing message */ 5359 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5360 sstartsj[0] = 0; rstartsj[0] = 0; 5361 len = 0; /* total length of j or a array to be sent */ 5362 k = 0; 5363 for (i=0; i<nsends; i++){ 5364 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5365 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5366 for (j=0; j<nrows; j++) { 5367 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5368 for (l=0; l<sbs; l++){ 5369 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5370 rowlen[j*sbs+l] = ncols; 5371 len += ncols; 5372 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5373 } 5374 k++; 5375 } 5376 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5377 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5378 } 5379 /* recvs and sends of i-array are completed */ 5380 i = nrecvs; 5381 while (i--) { 5382 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5383 } 5384 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5385 5386 /* allocate buffers for sending j and a arrays */ 5387 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5388 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5389 5390 /* create i-array of B_oth */ 5391 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5392 b_othi[0] = 0; 5393 len = 0; /* total length of j or a array to be received */ 5394 k = 0; 5395 for (i=0; i<nrecvs; i++){ 5396 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5397 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5398 for (j=0; j<nrows; j++) { 5399 b_othi[k+1] = b_othi[k] + rowlen[j]; 5400 len += rowlen[j]; k++; 5401 } 5402 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5403 } 5404 5405 /* allocate space for j and a arrrays of B_oth */ 5406 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5407 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5408 5409 /* j-array */ 5410 /*---------*/ 5411 /* post receives of j-array */ 5412 for (i=0; i<nrecvs; i++){ 5413 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5414 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5415 } 5416 5417 /* pack the outgoing message j-array */ 5418 k = 0; 5419 for (i=0; i<nsends; i++){ 5420 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5421 bufJ = bufj+sstartsj[i]; 5422 for (j=0; j<nrows; j++) { 5423 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5424 for (ll=0; ll<sbs; ll++){ 5425 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5426 for (l=0; l<ncols; l++){ 5427 *bufJ++ = cols[l]; 5428 } 5429 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5430 } 5431 } 5432 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5433 } 5434 5435 /* recvs and sends of j-array are completed */ 5436 i = nrecvs; 5437 while (i--) { 5438 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5439 } 5440 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5441 } else if (scall == MAT_REUSE_MATRIX){ 5442 sstartsj = *startsj_s; 5443 rstartsj = *startsj_r; 5444 bufa = *bufa_ptr; 5445 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5446 b_otha = b_oth->a; 5447 } else { 5448 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5449 } 5450 5451 /* a-array */ 5452 /*---------*/ 5453 /* post receives of a-array */ 5454 for (i=0; i<nrecvs; i++){ 5455 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5456 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5457 } 5458 5459 /* pack the outgoing message a-array */ 5460 k = 0; 5461 for (i=0; i<nsends; i++){ 5462 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5463 bufA = bufa+sstartsj[i]; 5464 for (j=0; j<nrows; j++) { 5465 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5466 for (ll=0; ll<sbs; ll++){ 5467 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5468 for (l=0; l<ncols; l++){ 5469 *bufA++ = vals[l]; 5470 } 5471 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5472 } 5473 } 5474 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5475 } 5476 /* recvs and sends of a-array are completed */ 5477 i = nrecvs; 5478 while (i--) { 5479 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5480 } 5481 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5482 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5483 5484 if (scall == MAT_INITIAL_MATRIX){ 5485 /* put together the new matrix */ 5486 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5487 5488 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5489 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5490 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5491 b_oth->free_a = PETSC_TRUE; 5492 b_oth->free_ij = PETSC_TRUE; 5493 b_oth->nonew = 0; 5494 5495 ierr = PetscFree(bufj);CHKERRQ(ierr); 5496 if (!startsj_s || !bufa_ptr){ 5497 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5498 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5499 } else { 5500 *startsj_s = sstartsj; 5501 *startsj_r = rstartsj; 5502 *bufa_ptr = bufa; 5503 } 5504 } 5505 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5506 PetscFunctionReturn(0); 5507 } 5508 5509 #undef __FUNCT__ 5510 #define __FUNCT__ "MatGetCommunicationStructs" 5511 /*@C 5512 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5513 5514 Not Collective 5515 5516 Input Parameters: 5517 . A - The matrix in mpiaij format 5518 5519 Output Parameter: 5520 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5521 . colmap - A map from global column index to local index into lvec 5522 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5523 5524 Level: developer 5525 5526 @*/ 5527 #if defined (PETSC_USE_CTABLE) 5528 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5529 #else 5530 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5531 #endif 5532 { 5533 Mat_MPIAIJ *a; 5534 5535 PetscFunctionBegin; 5536 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5537 PetscValidPointer(lvec, 2); 5538 PetscValidPointer(colmap, 3); 5539 PetscValidPointer(multScatter, 4); 5540 a = (Mat_MPIAIJ *) A->data; 5541 if (lvec) *lvec = a->lvec; 5542 if (colmap) *colmap = a->colmap; 5543 if (multScatter) *multScatter = a->Mvctx; 5544 PetscFunctionReturn(0); 5545 } 5546 5547 EXTERN_C_BEGIN 5548 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5549 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5550 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5551 EXTERN_C_END 5552 5553 #undef __FUNCT__ 5554 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5555 /* 5556 Computes (B'*A')' since computing B*A directly is untenable 5557 5558 n p p 5559 ( ) ( ) ( ) 5560 m ( A ) * n ( B ) = m ( C ) 5561 ( ) ( ) ( ) 5562 5563 */ 5564 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5565 { 5566 PetscErrorCode ierr; 5567 Mat At,Bt,Ct; 5568 5569 PetscFunctionBegin; 5570 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5571 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5572 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5573 ierr = MatDestroy(&At);CHKERRQ(ierr); 5574 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5575 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5576 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5577 PetscFunctionReturn(0); 5578 } 5579 5580 #undef __FUNCT__ 5581 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5582 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5583 { 5584 PetscErrorCode ierr; 5585 PetscInt m=A->rmap->n,n=B->cmap->n; 5586 Mat Cmat; 5587 5588 PetscFunctionBegin; 5589 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5590 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5591 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5592 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5593 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5594 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5595 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5596 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5597 5598 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5599 *C = Cmat; 5600 PetscFunctionReturn(0); 5601 } 5602 5603 /* ----------------------------------------------------------------*/ 5604 #undef __FUNCT__ 5605 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5606 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5607 { 5608 PetscErrorCode ierr; 5609 5610 PetscFunctionBegin; 5611 if (scall == MAT_INITIAL_MATRIX){ 5612 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5613 } 5614 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5615 PetscFunctionReturn(0); 5616 } 5617 5618 EXTERN_C_BEGIN 5619 #if defined(PETSC_HAVE_MUMPS) 5620 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5621 #endif 5622 #if defined(PETSC_HAVE_PASTIX) 5623 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5624 #endif 5625 #if defined(PETSC_HAVE_SUPERLU_DIST) 5626 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5627 #endif 5628 #if defined(PETSC_HAVE_CLIQUE) 5629 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5630 #endif 5631 EXTERN_C_END 5632 5633 /*MC 5634 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5635 5636 Options Database Keys: 5637 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5638 5639 Level: beginner 5640 5641 .seealso: MatCreateAIJ() 5642 M*/ 5643 5644 EXTERN_C_BEGIN 5645 #undef __FUNCT__ 5646 #define __FUNCT__ "MatCreate_MPIAIJ" 5647 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5648 { 5649 Mat_MPIAIJ *b; 5650 PetscErrorCode ierr; 5651 PetscMPIInt size; 5652 5653 PetscFunctionBegin; 5654 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5655 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5656 B->data = (void*)b; 5657 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5658 B->assembled = PETSC_FALSE; 5659 B->insertmode = NOT_SET_VALUES; 5660 b->size = size; 5661 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5662 5663 /* build cache for off array entries formed */ 5664 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5665 b->donotstash = PETSC_FALSE; 5666 b->colmap = 0; 5667 b->garray = 0; 5668 b->roworiented = PETSC_TRUE; 5669 5670 /* stuff used for matrix vector multiply */ 5671 b->lvec = PETSC_NULL; 5672 b->Mvctx = PETSC_NULL; 5673 5674 /* stuff for MatGetRow() */ 5675 b->rowindices = 0; 5676 b->rowvalues = 0; 5677 b->getrowactive = PETSC_FALSE; 5678 5679 /* flexible pointer used in CUSP/CUSPARSE classes */ 5680 b->spptr = PETSC_NULL; 5681 5682 #if defined(PETSC_HAVE_MUMPS) 5683 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5684 "MatGetFactor_aij_mumps", 5685 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5686 #endif 5687 #if defined(PETSC_HAVE_PASTIX) 5688 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5689 "MatGetFactor_mpiaij_pastix", 5690 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5691 #endif 5692 #if defined(PETSC_HAVE_SUPERLU_DIST) 5693 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5694 "MatGetFactor_mpiaij_superlu_dist", 5695 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5696 #endif 5697 #if defined(PETSC_HAVE_CLIQUE) 5698 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C", 5699 "MatGetFactor_aij_clique", 5700 MatGetFactor_aij_clique);CHKERRQ(ierr); 5701 #endif 5702 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5703 "MatStoreValues_MPIAIJ", 5704 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5705 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5706 "MatRetrieveValues_MPIAIJ", 5707 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5708 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5709 "MatGetDiagonalBlock_MPIAIJ", 5710 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5711 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5712 "MatIsTranspose_MPIAIJ", 5713 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5714 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5715 "MatMPIAIJSetPreallocation_MPIAIJ", 5716 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5717 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5718 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5719 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5720 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5721 "MatDiagonalScaleLocal_MPIAIJ", 5722 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5723 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5724 "MatConvert_MPIAIJ_MPIAIJPERM", 5725 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5726 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5727 "MatConvert_MPIAIJ_MPIAIJCRL", 5728 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5729 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5730 "MatConvert_MPIAIJ_MPISBAIJ", 5731 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5732 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5733 "MatMatMult_MPIDense_MPIAIJ", 5734 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5735 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5736 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5737 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5738 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5739 "MatMatMultNumeric_MPIDense_MPIAIJ", 5740 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5741 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5742 PetscFunctionReturn(0); 5743 } 5744 EXTERN_C_END 5745 5746 #undef __FUNCT__ 5747 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5748 /*@ 5749 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5750 and "off-diagonal" part of the matrix in CSR format. 5751 5752 Collective on MPI_Comm 5753 5754 Input Parameters: 5755 + comm - MPI communicator 5756 . m - number of local rows (Cannot be PETSC_DECIDE) 5757 . n - This value should be the same as the local size used in creating the 5758 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5759 calculated if N is given) For square matrices n is almost always m. 5760 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5761 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5762 . i - row indices for "diagonal" portion of matrix 5763 . j - column indices 5764 . a - matrix values 5765 . oi - row indices for "off-diagonal" portion of matrix 5766 . oj - column indices 5767 - oa - matrix values 5768 5769 Output Parameter: 5770 . mat - the matrix 5771 5772 Level: advanced 5773 5774 Notes: 5775 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5776 must free the arrays once the matrix has been destroyed and not before. 5777 5778 The i and j indices are 0 based 5779 5780 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5781 5782 This sets local rows and cannot be used to set off-processor values. 5783 5784 You cannot later use MatSetValues() to change values in this matrix. 5785 5786 .keywords: matrix, aij, compressed row, sparse, parallel 5787 5788 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5789 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5790 @*/ 5791 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5792 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5793 { 5794 PetscErrorCode ierr; 5795 Mat_MPIAIJ *maij; 5796 5797 PetscFunctionBegin; 5798 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5799 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5800 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5801 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5802 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5803 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5804 maij = (Mat_MPIAIJ*) (*mat)->data; 5805 maij->donotstash = PETSC_TRUE; 5806 (*mat)->preallocated = PETSC_TRUE; 5807 5808 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5809 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5810 5811 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5812 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5813 5814 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5815 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5816 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5817 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5818 5819 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5820 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5821 PetscFunctionReturn(0); 5822 } 5823 5824 /* 5825 Special version for direct calls from Fortran 5826 */ 5827 #include <petsc-private/fortranimpl.h> 5828 5829 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5830 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5831 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5832 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5833 #endif 5834 5835 /* Change these macros so can be used in void function */ 5836 #undef CHKERRQ 5837 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5838 #undef SETERRQ2 5839 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5840 #undef SETERRQ3 5841 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5842 #undef SETERRQ 5843 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5844 5845 EXTERN_C_BEGIN 5846 #undef __FUNCT__ 5847 #define __FUNCT__ "matsetvaluesmpiaij_" 5848 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5849 { 5850 Mat mat = *mmat; 5851 PetscInt m = *mm, n = *mn; 5852 InsertMode addv = *maddv; 5853 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5854 PetscScalar value; 5855 PetscErrorCode ierr; 5856 5857 MatCheckPreallocated(mat,1); 5858 if (mat->insertmode == NOT_SET_VALUES) { 5859 mat->insertmode = addv; 5860 } 5861 #if defined(PETSC_USE_DEBUG) 5862 else if (mat->insertmode != addv) { 5863 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5864 } 5865 #endif 5866 { 5867 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5868 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5869 PetscBool roworiented = aij->roworiented; 5870 5871 /* Some Variables required in the macro */ 5872 Mat A = aij->A; 5873 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5874 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5875 MatScalar *aa = a->a; 5876 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5877 Mat B = aij->B; 5878 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5879 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5880 MatScalar *ba = b->a; 5881 5882 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5883 PetscInt nonew = a->nonew; 5884 MatScalar *ap1,*ap2; 5885 5886 PetscFunctionBegin; 5887 for (i=0; i<m; i++) { 5888 if (im[i] < 0) continue; 5889 #if defined(PETSC_USE_DEBUG) 5890 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5891 #endif 5892 if (im[i] >= rstart && im[i] < rend) { 5893 row = im[i] - rstart; 5894 lastcol1 = -1; 5895 rp1 = aj + ai[row]; 5896 ap1 = aa + ai[row]; 5897 rmax1 = aimax[row]; 5898 nrow1 = ailen[row]; 5899 low1 = 0; 5900 high1 = nrow1; 5901 lastcol2 = -1; 5902 rp2 = bj + bi[row]; 5903 ap2 = ba + bi[row]; 5904 rmax2 = bimax[row]; 5905 nrow2 = bilen[row]; 5906 low2 = 0; 5907 high2 = nrow2; 5908 5909 for (j=0; j<n; j++) { 5910 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5911 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5912 if (in[j] >= cstart && in[j] < cend){ 5913 col = in[j] - cstart; 5914 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5915 } else if (in[j] < 0) continue; 5916 #if defined(PETSC_USE_DEBUG) 5917 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5918 #endif 5919 else { 5920 if (mat->was_assembled) { 5921 if (!aij->colmap) { 5922 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5923 } 5924 #if defined (PETSC_USE_CTABLE) 5925 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5926 col--; 5927 #else 5928 col = aij->colmap[in[j]] - 1; 5929 #endif 5930 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5931 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5932 col = in[j]; 5933 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5934 B = aij->B; 5935 b = (Mat_SeqAIJ*)B->data; 5936 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5937 rp2 = bj + bi[row]; 5938 ap2 = ba + bi[row]; 5939 rmax2 = bimax[row]; 5940 nrow2 = bilen[row]; 5941 low2 = 0; 5942 high2 = nrow2; 5943 bm = aij->B->rmap->n; 5944 ba = b->a; 5945 } 5946 } else col = in[j]; 5947 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5948 } 5949 } 5950 } else { 5951 if (!aij->donotstash) { 5952 if (roworiented) { 5953 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5954 } else { 5955 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5956 } 5957 } 5958 } 5959 }} 5960 PetscFunctionReturnVoid(); 5961 } 5962 EXTERN_C_END 5963 5964