1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for (j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 110 PetscErrorCode ierr; 111 PetscInt i,rstart,nrows,*rows; 112 113 PetscFunctionBegin; 114 *zrows = PETSC_NULL; 115 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 116 ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr); 117 for (i=0; i<nrows; i++) rows[i] += rstart; 118 ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 125 { 126 PetscErrorCode ierr; 127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 128 PetscInt i,n,*garray = aij->garray; 129 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 130 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 131 PetscReal *work; 132 133 PetscFunctionBegin; 134 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 135 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 136 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 137 if (type == NORM_2) { 138 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 139 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 140 } 141 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 142 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 143 } 144 } else if (type == NORM_1) { 145 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 146 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 147 } 148 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 149 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 150 } 151 } else if (type == NORM_INFINITY) { 152 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 153 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 154 } 155 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 156 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 157 } 158 159 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 160 if (type == NORM_INFINITY) { 161 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 162 } else { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 164 } 165 ierr = PetscFree(work);CHKERRQ(ierr); 166 if (type == NORM_2) { 167 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatDistribute_MPIAIJ" 174 /* 175 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 176 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 177 178 Only for square matrices 179 */ 180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 181 { 182 PetscMPIInt rank,size; 183 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 184 PetscErrorCode ierr; 185 Mat mat; 186 Mat_SeqAIJ *gmata; 187 PetscMPIInt tag; 188 MPI_Status status; 189 PetscBool aij; 190 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 191 192 PetscFunctionBegin; 193 CHKMEMQ; 194 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 196 if (!rank) { 197 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 198 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 199 } 200 if (reuse == MAT_INITIAL_MATRIX) { 201 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 202 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 203 if (!rank) { 204 bses[0] = gmat->rmap->bs; 205 bses[1] = gmat->cmap->bs; 206 } 207 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 208 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 209 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 210 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 211 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 212 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) { 215 rowners[i] += rowners[i-1]; 216 } 217 rstart = rowners[rank]; 218 rend = rowners[rank+1]; 219 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 220 if (!rank) { 221 gmata = (Mat_SeqAIJ*) gmat->data; 222 /* send row lengths to all processors */ 223 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 224 for (i=1; i<size; i++) { 225 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 226 } 227 /* determine number diagonal and off-diagonal counts */ 228 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 229 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 230 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 231 jj = 0; 232 for (i=0; i<m; i++) { 233 for (j=0; j<dlens[i]; j++) { 234 if (gmata->j[jj] < rstart) ld[i]++; 235 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 236 jj++; 237 } 238 } 239 /* send column indices to other processes */ 240 for (i=1; i<size; i++) { 241 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 242 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 } 245 246 /* send numerical values to other processes */ 247 for (i=1; i<size; i++) { 248 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 249 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 250 } 251 gmataa = gmata->a; 252 gmataj = gmata->j; 253 254 } else { 255 /* receive row lengths */ 256 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 257 /* receive column indices */ 258 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 260 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 /* determine number diagonal and off-diagonal counts */ 262 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 264 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 265 jj = 0; 266 for (i=0; i<m; i++) { 267 for (j=0; j<dlens[i]; j++) { 268 if (gmataj[jj] < rstart) ld[i]++; 269 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 270 jj++; 271 } 272 } 273 /* receive numerical values */ 274 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 275 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 276 } 277 /* set preallocation */ 278 for (i=0; i<m; i++) { 279 dlens[i] -= olens[i]; 280 } 281 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 282 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 283 284 for (i=0; i<m; i++) { 285 dlens[i] += olens[i]; 286 } 287 cnt = 0; 288 for (i=0; i<m; i++) { 289 row = rstart + i; 290 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 291 cnt += dlens[i]; 292 } 293 if (rank) { 294 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 295 } 296 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 297 ierr = PetscFree(rowners);CHKERRQ(ierr); 298 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 299 *inmat = mat; 300 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 301 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 302 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 303 mat = *inmat; 304 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 305 if (!rank) { 306 /* send numerical values to other processes */ 307 gmata = (Mat_SeqAIJ*) gmat->data; 308 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 309 gmataa = gmata->a; 310 for (i=1; i<size; i++) { 311 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 312 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 313 } 314 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 315 } else { 316 /* receive numerical values from process 0*/ 317 nz = Ad->nz + Ao->nz; 318 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 319 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 320 } 321 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 322 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 323 ad = Ad->a; 324 ao = Ao->a; 325 if (mat->rmap->n) { 326 i = 0; 327 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 328 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 329 } 330 for (i=1; i<mat->rmap->n; i++) { 331 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 i--; 335 if (mat->rmap->n) { 336 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 337 } 338 if (rank) { 339 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 340 } 341 } 342 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 CHKMEMQ; 345 PetscFunctionReturn(0); 346 } 347 348 /* 349 Local utility routine that creates a mapping from the global column 350 number to the local number in the off-diagonal part of the local 351 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 352 a slightly higher hash table cost; without it it is not scalable (each processor 353 has an order N integer array but is fast to acess. 354 */ 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 358 { 359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 360 PetscErrorCode ierr; 361 PetscInt n = aij->B->cmap->n,i; 362 363 PetscFunctionBegin; 364 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 365 #if defined (PETSC_USE_CTABLE) 366 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 367 for (i=0; i<n; i++){ 368 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 369 } 370 #else 371 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 372 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 373 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 374 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 375 #endif 376 PetscFunctionReturn(0); 377 } 378 379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 380 { \ 381 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 382 lastcol1 = col;\ 383 while (high1-low1 > 5) { \ 384 t = (low1+high1)/2; \ 385 if (rp1[t] > col) high1 = t; \ 386 else low1 = t; \ 387 } \ 388 for (_i=low1; _i<high1; _i++) { \ 389 if (rp1[_i] > col) break; \ 390 if (rp1[_i] == col) { \ 391 if (addv == ADD_VALUES) ap1[_i] += value; \ 392 else ap1[_i] = value; \ 393 goto a_noinsert; \ 394 } \ 395 } \ 396 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 397 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 398 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 399 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 400 N = nrow1++ - 1; a->nz++; high1++; \ 401 /* shift up all the later entries in this row */ \ 402 for (ii=N; ii>=_i; ii--) { \ 403 rp1[ii+1] = rp1[ii]; \ 404 ap1[ii+1] = ap1[ii]; \ 405 } \ 406 rp1[_i] = col; \ 407 ap1[_i] = value; \ 408 a_noinsert: ; \ 409 ailen[row] = nrow1; \ 410 } 411 412 413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 414 { \ 415 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 416 lastcol2 = col;\ 417 while (high2-low2 > 5) { \ 418 t = (low2+high2)/2; \ 419 if (rp2[t] > col) high2 = t; \ 420 else low2 = t; \ 421 } \ 422 for (_i=low2; _i<high2; _i++) { \ 423 if (rp2[_i] > col) break; \ 424 if (rp2[_i] == col) { \ 425 if (addv == ADD_VALUES) ap2[_i] += value; \ 426 else ap2[_i] = value; \ 427 goto b_noinsert; \ 428 } \ 429 } \ 430 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 432 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 433 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 434 N = nrow2++ - 1; b->nz++; high2++; \ 435 /* shift up all the later entries in this row */ \ 436 for (ii=N; ii>=_i; ii--) { \ 437 rp2[ii+1] = rp2[ii]; \ 438 ap2[ii+1] = ap2[ii]; \ 439 } \ 440 rp2[_i] = col; \ 441 ap2[_i] = value; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 if (v) PetscValidScalarPointer(v,6); 502 for (i=0; i<m; i++) { 503 if (im[i] < 0) continue; 504 #if defined(PETSC_USE_DEBUG) 505 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 506 #endif 507 if (im[i] >= rstart && im[i] < rend) { 508 row = im[i] - rstart; 509 lastcol1 = -1; 510 rp1 = aj + ai[row]; 511 ap1 = aa + ai[row]; 512 rmax1 = aimax[row]; 513 nrow1 = ailen[row]; 514 low1 = 0; 515 high1 = nrow1; 516 lastcol2 = -1; 517 rp2 = bj + bi[row]; 518 ap2 = ba + bi[row]; 519 rmax2 = bimax[row]; 520 nrow2 = bilen[row]; 521 low2 = 0; 522 high2 = nrow2; 523 524 for (j=0; j<n; j++) { 525 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend){ 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined (PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend){ 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined (PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col --; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else { 620 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 621 } 622 } 623 PetscFunctionReturn(0); 624 } 625 626 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 627 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 630 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 631 { 632 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 633 PetscErrorCode ierr; 634 PetscInt nstash,reallocs; 635 InsertMode addv; 636 637 PetscFunctionBegin; 638 if (aij->donotstash || mat->nooffprocentries) { 639 PetscFunctionReturn(0); 640 } 641 642 /* make sure all processors are either in INSERTMODE or ADDMODE */ 643 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 644 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 645 mat->insertmode = addv; /* in case this processor had no cache */ 646 647 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 648 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 649 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 656 { 657 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 658 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 659 PetscErrorCode ierr; 660 PetscMPIInt n; 661 PetscInt i,j,rstart,ncols,flg; 662 PetscInt *row,*col; 663 PetscBool other_disassembled; 664 PetscScalar *val; 665 InsertMode addv = mat->insertmode; 666 667 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 668 PetscFunctionBegin; 669 if (!aij->donotstash && !mat->nooffprocentries) { 670 while (1) { 671 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 672 if (!flg) break; 673 674 for (i=0; i<n;) { 675 /* Now identify the consecutive vals belonging to the same row */ 676 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 677 if (j < n) ncols = j-i; 678 else ncols = n-i; 679 /* Now assemble all these values with a single function call */ 680 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 681 i = j; 682 } 683 } 684 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 685 } 686 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 688 689 /* determine if any processor has disassembled, if so we must 690 also disassemble ourselfs, in order that we may reassemble. */ 691 /* 692 if nonzero structure of submatrix B cannot change then we know that 693 no processor disassembled thus we can skip this stuff 694 */ 695 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 696 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 697 if (mat->was_assembled && !other_disassembled) { 698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 699 } 700 } 701 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 702 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 705 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 706 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 707 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 708 709 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 710 aij->rowvalues = 0; 711 712 /* used by MatAXPY() */ 713 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 714 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 715 716 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 717 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 718 PetscFunctionReturn(0); 719 } 720 721 #undef __FUNCT__ 722 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 723 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 724 { 725 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 726 PetscErrorCode ierr; 727 728 PetscFunctionBegin; 729 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 730 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 731 PetscFunctionReturn(0); 732 } 733 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatZeroRows_MPIAIJ" 736 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 737 { 738 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 739 PetscErrorCode ierr; 740 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 741 PetscInt i,*owners = A->rmap->range; 742 PetscInt *nprocs,j,idx,nsends,row; 743 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 744 PetscInt *rvalues,count,base,slen,*source; 745 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 746 MPI_Comm comm = ((PetscObject)A)->comm; 747 MPI_Request *send_waits,*recv_waits; 748 MPI_Status recv_status,*send_status; 749 const PetscScalar *xx; 750 PetscScalar *bb; 751 #if defined(PETSC_DEBUG) 752 PetscBool found = PETSC_FALSE; 753 #endif 754 755 PetscFunctionBegin; 756 /* first count number of contributors to each processor */ 757 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 758 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 759 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 760 j = 0; 761 for (i=0; i<N; i++) { 762 if (lastidx > (idx = rows[i])) j = 0; 763 lastidx = idx; 764 for (; j<size; j++) { 765 if (idx >= owners[j] && idx < owners[j+1]) { 766 nprocs[2*j]++; 767 nprocs[2*j+1] = 1; 768 owner[i] = j; 769 #if defined(PETSC_DEBUG) 770 found = PETSC_TRUE; 771 #endif 772 break; 773 } 774 } 775 #if defined(PETSC_DEBUG) 776 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 777 found = PETSC_FALSE; 778 #endif 779 } 780 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 781 782 if (A->nooffproczerorows) { 783 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 784 nrecvs = nsends; 785 nmax = N; 786 } else { 787 /* inform other processors of number of messages and max length*/ 788 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 789 } 790 791 /* post receives: */ 792 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 793 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 794 for (i=0; i<nrecvs; i++) { 795 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 796 } 797 798 /* do sends: 799 1) starts[i] gives the starting index in svalues for stuff going to 800 the ith processor 801 */ 802 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 803 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 804 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 805 starts[0] = 0; 806 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 807 for (i=0; i<N; i++) { 808 svalues[starts[owner[i]]++] = rows[i]; 809 } 810 811 starts[0] = 0; 812 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 813 count = 0; 814 for (i=0; i<size; i++) { 815 if (nprocs[2*i+1]) { 816 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 817 } 818 } 819 ierr = PetscFree(starts);CHKERRQ(ierr); 820 821 base = owners[rank]; 822 823 /* wait on receives */ 824 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 825 count = nrecvs; slen = 0; 826 while (count) { 827 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 828 /* unpack receives into our local space */ 829 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 830 source[imdex] = recv_status.MPI_SOURCE; 831 lens[imdex] = n; 832 slen += n; 833 count--; 834 } 835 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 836 837 /* move the data into the send scatter */ 838 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 839 count = 0; 840 for (i=0; i<nrecvs; i++) { 841 values = rvalues + i*nmax; 842 for (j=0; j<lens[i]; j++) { 843 lrows[count++] = values[j] - base; 844 } 845 } 846 ierr = PetscFree(rvalues);CHKERRQ(ierr); 847 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 848 ierr = PetscFree(owner);CHKERRQ(ierr); 849 ierr = PetscFree(nprocs);CHKERRQ(ierr); 850 851 /* fix right hand side if needed */ 852 if (x && b) { 853 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 854 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 855 for (i=0; i<slen; i++) { 856 bb[lrows[i]] = diag*xx[lrows[i]]; 857 } 858 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 859 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 860 } 861 /* 862 Zero the required rows. If the "diagonal block" of the matrix 863 is square and the user wishes to set the diagonal we use separate 864 code so that MatSetValues() is not called for each diagonal allocating 865 new memory, thus calling lots of mallocs and slowing things down. 866 867 */ 868 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 869 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 870 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 871 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 872 } else if (diag != 0.0) { 873 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 874 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 875 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 877 } 878 for (i = 0; i < slen; i++) { 879 row = lrows[i] + rstart; 880 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 881 } 882 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 883 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 884 } else { 885 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 886 } 887 ierr = PetscFree(lrows);CHKERRQ(ierr); 888 889 /* wait on sends */ 890 if (nsends) { 891 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 892 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 893 ierr = PetscFree(send_status);CHKERRQ(ierr); 894 } 895 ierr = PetscFree(send_waits);CHKERRQ(ierr); 896 ierr = PetscFree(svalues);CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 #undef __FUNCT__ 901 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 902 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 903 { 904 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 905 PetscErrorCode ierr; 906 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 907 PetscInt i,*owners = A->rmap->range; 908 PetscInt *nprocs,j,idx,nsends; 909 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 910 PetscInt *rvalues,count,base,slen,*source; 911 PetscInt *lens,*lrows,*values,m; 912 MPI_Comm comm = ((PetscObject)A)->comm; 913 MPI_Request *send_waits,*recv_waits; 914 MPI_Status recv_status,*send_status; 915 const PetscScalar *xx; 916 PetscScalar *bb,*mask; 917 Vec xmask,lmask; 918 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 919 const PetscInt *aj, *ii,*ridx; 920 PetscScalar *aa; 921 #if defined(PETSC_DEBUG) 922 PetscBool found = PETSC_FALSE; 923 #endif 924 925 PetscFunctionBegin; 926 /* first count number of contributors to each processor */ 927 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 928 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 929 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 930 j = 0; 931 for (i=0; i<N; i++) { 932 if (lastidx > (idx = rows[i])) j = 0; 933 lastidx = idx; 934 for (; j<size; j++) { 935 if (idx >= owners[j] && idx < owners[j+1]) { 936 nprocs[2*j]++; 937 nprocs[2*j+1] = 1; 938 owner[i] = j; 939 #if defined(PETSC_DEBUG) 940 found = PETSC_TRUE; 941 #endif 942 break; 943 } 944 } 945 #if defined(PETSC_DEBUG) 946 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 947 found = PETSC_FALSE; 948 #endif 949 } 950 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 951 952 /* inform other processors of number of messages and max length*/ 953 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 954 955 /* post receives: */ 956 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 957 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 958 for (i=0; i<nrecvs; i++) { 959 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 960 } 961 962 /* do sends: 963 1) starts[i] gives the starting index in svalues for stuff going to 964 the ith processor 965 */ 966 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 967 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 968 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 969 starts[0] = 0; 970 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 971 for (i=0; i<N; i++) { 972 svalues[starts[owner[i]]++] = rows[i]; 973 } 974 975 starts[0] = 0; 976 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 977 count = 0; 978 for (i=0; i<size; i++) { 979 if (nprocs[2*i+1]) { 980 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 981 } 982 } 983 ierr = PetscFree(starts);CHKERRQ(ierr); 984 985 base = owners[rank]; 986 987 /* wait on receives */ 988 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 989 count = nrecvs; slen = 0; 990 while (count) { 991 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 992 /* unpack receives into our local space */ 993 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 994 source[imdex] = recv_status.MPI_SOURCE; 995 lens[imdex] = n; 996 slen += n; 997 count--; 998 } 999 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1000 1001 /* move the data into the send scatter */ 1002 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1003 count = 0; 1004 for (i=0; i<nrecvs; i++) { 1005 values = rvalues + i*nmax; 1006 for (j=0; j<lens[i]; j++) { 1007 lrows[count++] = values[j] - base; 1008 } 1009 } 1010 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1011 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1012 ierr = PetscFree(owner);CHKERRQ(ierr); 1013 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1014 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1015 1016 /* zero diagonal part of matrix */ 1017 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1018 1019 /* handle off diagonal part of matrix */ 1020 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 1021 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1022 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1023 for (i=0; i<slen; i++) { 1024 bb[lrows[i]] = 1; 1025 } 1026 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1027 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1030 if (x) { 1031 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1034 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1035 } 1036 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1037 1038 /* remove zeroed rows of off diagonal matrix */ 1039 ii = aij->i; 1040 for (i=0; i<slen; i++) { 1041 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1042 } 1043 1044 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1045 if (aij->compressedrow.use){ 1046 m = aij->compressedrow.nrows; 1047 ii = aij->compressedrow.i; 1048 ridx = aij->compressedrow.rindex; 1049 for (i=0; i<m; i++){ 1050 n = ii[i+1] - ii[i]; 1051 aj = aij->j + ii[i]; 1052 aa = aij->a + ii[i]; 1053 1054 for (j=0; j<n; j++) { 1055 if (PetscAbsScalar(mask[*aj])) { 1056 if (b) bb[*ridx] -= *aa*xx[*aj]; 1057 *aa = 0.0; 1058 } 1059 aa++; 1060 aj++; 1061 } 1062 ridx++; 1063 } 1064 } else { /* do not use compressed row format */ 1065 m = l->B->rmap->n; 1066 for (i=0; i<m; i++) { 1067 n = ii[i+1] - ii[i]; 1068 aj = aij->j + ii[i]; 1069 aa = aij->a + ii[i]; 1070 for (j=0; j<n; j++) { 1071 if (PetscAbsScalar(mask[*aj])) { 1072 if (b) bb[i] -= *aa*xx[*aj]; 1073 *aa = 0.0; 1074 } 1075 aa++; 1076 aj++; 1077 } 1078 } 1079 } 1080 if (x) { 1081 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1082 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1083 } 1084 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1085 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1086 ierr = PetscFree(lrows);CHKERRQ(ierr); 1087 1088 /* wait on sends */ 1089 if (nsends) { 1090 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1091 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1092 ierr = PetscFree(send_status);CHKERRQ(ierr); 1093 } 1094 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1095 ierr = PetscFree(svalues);CHKERRQ(ierr); 1096 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMult_MPIAIJ" 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 1108 PetscFunctionBegin; 1109 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1110 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscBool merged; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 if (!merged) { 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* added in yy until the next line, */ 1164 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 } else { 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 /* values actually were received in the Begin() but we need to call this nop */ 1171 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 EXTERN_C_BEGIN 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1179 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1180 { 1181 MPI_Comm comm; 1182 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1183 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1184 IS Me,Notme; 1185 PetscErrorCode ierr; 1186 PetscInt M,N,first,last,*notme,i; 1187 PetscMPIInt size; 1188 1189 PetscFunctionBegin; 1190 1191 /* Easy test: symmetric diagonal block */ 1192 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1193 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1194 if (!*f) PetscFunctionReturn(0); 1195 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1196 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1197 if (size == 1) PetscFunctionReturn(0); 1198 1199 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1200 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1201 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1202 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1203 for (i=0; i<first; i++) notme[i] = i; 1204 for (i=last; i<M; i++) notme[i-last+first] = i; 1205 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1206 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1207 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1208 Aoff = Aoffs[0]; 1209 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1210 Boff = Boffs[0]; 1211 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1212 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1213 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1214 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1215 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1216 ierr = PetscFree(notme);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 EXTERN_C_END 1220 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1223 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1224 { 1225 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 /* do nondiagonal part */ 1230 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1231 /* send it on its way */ 1232 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1233 /* do local part */ 1234 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1235 /* receive remote parts */ 1236 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 /* 1241 This only works correctly for square matrices where the subblock A->A is the 1242 diagonal block 1243 */ 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1246 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1247 { 1248 PetscErrorCode ierr; 1249 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1250 1251 PetscFunctionBegin; 1252 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1253 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1254 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatScale_MPIAIJ" 1260 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1261 { 1262 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1263 PetscErrorCode ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1267 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatDestroy_MPIAIJ" 1273 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1274 { 1275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1276 PetscErrorCode ierr; 1277 1278 PetscFunctionBegin; 1279 #if defined(PETSC_USE_LOG) 1280 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1281 #endif 1282 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1283 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1284 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1285 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1286 #if defined (PETSC_USE_CTABLE) 1287 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1288 #else 1289 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1290 #endif 1291 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1292 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1293 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1294 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1295 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1296 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1297 1298 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1312 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1313 { 1314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1315 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1316 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1317 PetscErrorCode ierr; 1318 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1319 int fd; 1320 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1321 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1322 PetscScalar *column_values; 1323 PetscInt message_count,flowcontrolcount; 1324 FILE *file; 1325 1326 PetscFunctionBegin; 1327 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1328 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1329 nz = A->nz + B->nz; 1330 if (!rank) { 1331 header[0] = MAT_FILE_CLASSID; 1332 header[1] = mat->rmap->N; 1333 header[2] = mat->cmap->N; 1334 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1335 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1336 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1337 /* get largest number of rows any processor has */ 1338 rlen = mat->rmap->n; 1339 range = mat->rmap->range; 1340 for (i=1; i<size; i++) { 1341 rlen = PetscMax(rlen,range[i+1] - range[i]); 1342 } 1343 } else { 1344 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1345 rlen = mat->rmap->n; 1346 } 1347 1348 /* load up the local row counts */ 1349 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1350 for (i=0; i<mat->rmap->n; i++) { 1351 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1352 } 1353 1354 /* store the row lengths to the file */ 1355 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1356 if (!rank) { 1357 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1358 for (i=1; i<size; i++) { 1359 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1360 rlen = range[i+1] - range[i]; 1361 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1362 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1363 } 1364 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1365 } else { 1366 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1367 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1368 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1369 } 1370 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1371 1372 /* load up the local column indices */ 1373 nzmax = nz; /* )th processor needs space a largest processor needs */ 1374 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1375 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1376 cnt = 0; 1377 for (i=0; i<mat->rmap->n; i++) { 1378 for (j=B->i[i]; j<B->i[i+1]; j++) { 1379 if ( (col = garray[B->j[j]]) > cstart) break; 1380 column_indices[cnt++] = col; 1381 } 1382 for (k=A->i[i]; k<A->i[i+1]; k++) { 1383 column_indices[cnt++] = A->j[k] + cstart; 1384 } 1385 for (; j<B->i[i+1]; j++) { 1386 column_indices[cnt++] = garray[B->j[j]]; 1387 } 1388 } 1389 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1390 1391 /* store the column indices to the file */ 1392 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1393 if (!rank) { 1394 MPI_Status status; 1395 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1396 for (i=1; i<size; i++) { 1397 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1398 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1399 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1400 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1401 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1402 } 1403 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1404 } else { 1405 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1406 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1407 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1408 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1409 } 1410 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1411 1412 /* load up the local column values */ 1413 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1414 cnt = 0; 1415 for (i=0; i<mat->rmap->n; i++) { 1416 for (j=B->i[i]; j<B->i[i+1]; j++) { 1417 if ( garray[B->j[j]] > cstart) break; 1418 column_values[cnt++] = B->a[j]; 1419 } 1420 for (k=A->i[i]; k<A->i[i+1]; k++) { 1421 column_values[cnt++] = A->a[k]; 1422 } 1423 for (; j<B->i[i+1]; j++) { 1424 column_values[cnt++] = B->a[j]; 1425 } 1426 } 1427 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1428 1429 /* store the column values to the file */ 1430 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1431 if (!rank) { 1432 MPI_Status status; 1433 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1434 for (i=1; i<size; i++) { 1435 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1436 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1437 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1438 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1439 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1440 } 1441 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1442 } else { 1443 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1444 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1445 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1446 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1447 } 1448 ierr = PetscFree(column_values);CHKERRQ(ierr); 1449 1450 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1451 if (file) { 1452 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1453 } 1454 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1460 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1461 { 1462 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1463 PetscErrorCode ierr; 1464 PetscMPIInt rank = aij->rank,size = aij->size; 1465 PetscBool isdraw,iascii,isbinary; 1466 PetscViewer sviewer; 1467 PetscViewerFormat format; 1468 1469 PetscFunctionBegin; 1470 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1471 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1472 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1473 if (iascii) { 1474 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1475 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1476 MatInfo info; 1477 PetscBool inodes; 1478 1479 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1480 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1481 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1482 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1483 if (!inodes) { 1484 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1485 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1486 } else { 1487 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1488 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1489 } 1490 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1491 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1492 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1493 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1494 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1495 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1496 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1497 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1500 PetscInt inodecount,inodelimit,*inodes; 1501 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1502 if (inodes) { 1503 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1504 } else { 1505 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1506 } 1507 PetscFunctionReturn(0); 1508 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1509 PetscFunctionReturn(0); 1510 } 1511 } else if (isbinary) { 1512 if (size == 1) { 1513 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1514 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1515 } else { 1516 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1517 } 1518 PetscFunctionReturn(0); 1519 } else if (isdraw) { 1520 PetscDraw draw; 1521 PetscBool isnull; 1522 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1523 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1524 } 1525 1526 if (size == 1) { 1527 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1528 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1529 } else { 1530 /* assemble the entire matrix onto first processor. */ 1531 Mat A; 1532 Mat_SeqAIJ *Aloc; 1533 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1534 MatScalar *a; 1535 1536 if (mat->rmap->N > 1024) { 1537 PetscBool flg = PETSC_FALSE; 1538 1539 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1540 if (!flg) { 1541 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1542 } 1543 } 1544 1545 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1546 if (!rank) { 1547 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1548 } else { 1549 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1550 } 1551 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1552 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1553 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1554 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1555 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1556 1557 /* copy over the A part */ 1558 Aloc = (Mat_SeqAIJ*)aij->A->data; 1559 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1560 row = mat->rmap->rstart; 1561 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1562 for (i=0; i<m; i++) { 1563 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1564 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1565 } 1566 aj = Aloc->j; 1567 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1568 1569 /* copy over the B part */ 1570 Aloc = (Mat_SeqAIJ*)aij->B->data; 1571 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1572 row = mat->rmap->rstart; 1573 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1574 ct = cols; 1575 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1576 for (i=0; i<m; i++) { 1577 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1578 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1579 } 1580 ierr = PetscFree(ct);CHKERRQ(ierr); 1581 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1582 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1583 /* 1584 Everyone has to call to draw the matrix since the graphics waits are 1585 synchronized across all processors that share the PetscDraw object 1586 */ 1587 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1588 if (!rank) { 1589 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1590 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1591 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1592 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1593 } 1594 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1595 ierr = MatDestroy(&A);CHKERRQ(ierr); 1596 } 1597 PetscFunctionReturn(0); 1598 } 1599 1600 #undef __FUNCT__ 1601 #define __FUNCT__ "MatView_MPIAIJ" 1602 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1603 { 1604 PetscErrorCode ierr; 1605 PetscBool iascii,isdraw,issocket,isbinary; 1606 1607 PetscFunctionBegin; 1608 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1609 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1610 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1611 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1612 if (iascii || isdraw || isbinary || issocket) { 1613 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1614 } else { 1615 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1616 } 1617 PetscFunctionReturn(0); 1618 } 1619 1620 #undef __FUNCT__ 1621 #define __FUNCT__ "MatSOR_MPIAIJ" 1622 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1623 { 1624 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1625 PetscErrorCode ierr; 1626 Vec bb1 = 0; 1627 PetscBool hasop; 1628 1629 PetscFunctionBegin; 1630 if (flag == SOR_APPLY_UPPER) { 1631 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 1635 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1636 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1637 } 1638 1639 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1640 if (flag & SOR_ZERO_INITIAL_GUESS) { 1641 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1642 its--; 1643 } 1644 1645 while (its--) { 1646 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1647 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1648 1649 /* update rhs: bb1 = bb - B*x */ 1650 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1651 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1652 1653 /* local sweep */ 1654 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1655 } 1656 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1657 if (flag & SOR_ZERO_INITIAL_GUESS) { 1658 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1659 its--; 1660 } 1661 while (its--) { 1662 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1663 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1664 1665 /* update rhs: bb1 = bb - B*x */ 1666 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1667 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1668 1669 /* local sweep */ 1670 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1671 } 1672 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1673 if (flag & SOR_ZERO_INITIAL_GUESS) { 1674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1675 its--; 1676 } 1677 while (its--) { 1678 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1679 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1680 1681 /* update rhs: bb1 = bb - B*x */ 1682 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1683 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1684 1685 /* local sweep */ 1686 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1687 } 1688 } else if (flag & SOR_EISENSTAT) { 1689 Vec xx1; 1690 1691 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1692 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1693 1694 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1695 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1696 if (!mat->diag) { 1697 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1698 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1699 } 1700 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1701 if (hasop) { 1702 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1703 } else { 1704 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1705 } 1706 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1707 1708 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1709 1710 /* local sweep */ 1711 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1712 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1713 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1714 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1715 1716 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatPermute_MPIAIJ" 1722 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1723 { 1724 Mat aA,aB,Aperm; 1725 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1726 PetscScalar *aa,*ba; 1727 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1728 PetscSF rowsf,sf; 1729 IS parcolp = PETSC_NULL; 1730 PetscBool done; 1731 PetscErrorCode ierr; 1732 1733 PetscFunctionBegin; 1734 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1735 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1736 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1737 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1738 1739 /* Invert row permutation to find out where my rows should go */ 1740 ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr); 1741 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1742 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1743 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1744 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1745 1746 /* Invert column permutation to find out where my columns should go */ 1747 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1748 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1749 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1750 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1751 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1752 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1753 1754 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1755 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1756 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1757 1758 /* Find out where my gcols should go */ 1759 ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr); 1760 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1761 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1762 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1763 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1764 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1765 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1766 1767 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1768 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1769 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1770 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1771 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1772 for (i=0; i<m; i++) { 1773 PetscInt row = rdest[i],rowner; 1774 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1775 for (j=ai[i]; j<ai[i+1]; j++) { 1776 PetscInt cowner,col = cdest[aj[j]]; 1777 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1778 if (rowner == cowner) dnnz[i]++; 1779 else onnz[i]++; 1780 } 1781 for (j=bi[i]; j<bi[i+1]; j++) { 1782 PetscInt cowner,col = gcdest[bj[j]]; 1783 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1784 if (rowner == cowner) dnnz[i]++; 1785 else onnz[i]++; 1786 } 1787 } 1788 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1789 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1790 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1791 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1792 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1793 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1794 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1795 1796 ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->cmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1797 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1798 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1799 for (i=0; i<m; i++) { 1800 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1801 PetscInt rowlen; 1802 rowlen = ai[i+1] - ai[i]; 1803 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1804 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1805 rowlen = bi[i+1] - bi[i]; 1806 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1807 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1808 } 1809 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1810 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1811 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1812 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1813 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1814 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1815 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1816 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1817 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1818 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1819 *B = Aperm; 1820 PetscFunctionReturn(0); 1821 } 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1825 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1826 { 1827 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1828 Mat A = mat->A,B = mat->B; 1829 PetscErrorCode ierr; 1830 PetscReal isend[5],irecv[5]; 1831 1832 PetscFunctionBegin; 1833 info->block_size = 1.0; 1834 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1835 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1836 isend[3] = info->memory; isend[4] = info->mallocs; 1837 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1838 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1839 isend[3] += info->memory; isend[4] += info->mallocs; 1840 if (flag == MAT_LOCAL) { 1841 info->nz_used = isend[0]; 1842 info->nz_allocated = isend[1]; 1843 info->nz_unneeded = isend[2]; 1844 info->memory = isend[3]; 1845 info->mallocs = isend[4]; 1846 } else if (flag == MAT_GLOBAL_MAX) { 1847 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1848 info->nz_used = irecv[0]; 1849 info->nz_allocated = irecv[1]; 1850 info->nz_unneeded = irecv[2]; 1851 info->memory = irecv[3]; 1852 info->mallocs = irecv[4]; 1853 } else if (flag == MAT_GLOBAL_SUM) { 1854 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1855 info->nz_used = irecv[0]; 1856 info->nz_allocated = irecv[1]; 1857 info->nz_unneeded = irecv[2]; 1858 info->memory = irecv[3]; 1859 info->mallocs = irecv[4]; 1860 } 1861 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1862 info->fill_ratio_needed = 0; 1863 info->factor_mallocs = 0; 1864 PetscFunctionReturn(0); 1865 } 1866 1867 #undef __FUNCT__ 1868 #define __FUNCT__ "MatSetOption_MPIAIJ" 1869 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1870 { 1871 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1872 PetscErrorCode ierr; 1873 1874 PetscFunctionBegin; 1875 switch (op) { 1876 case MAT_NEW_NONZERO_LOCATIONS: 1877 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1878 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1879 case MAT_KEEP_NONZERO_PATTERN: 1880 case MAT_NEW_NONZERO_LOCATION_ERR: 1881 case MAT_USE_INODES: 1882 case MAT_IGNORE_ZERO_ENTRIES: 1883 MatCheckPreallocated(A,1); 1884 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1885 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1886 break; 1887 case MAT_ROW_ORIENTED: 1888 a->roworiented = flg; 1889 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1890 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1891 break; 1892 case MAT_NEW_DIAGONALS: 1893 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1894 break; 1895 case MAT_IGNORE_OFF_PROC_ENTRIES: 1896 a->donotstash = flg; 1897 break; 1898 case MAT_SPD: 1899 A->spd_set = PETSC_TRUE; 1900 A->spd = flg; 1901 if (flg) { 1902 A->symmetric = PETSC_TRUE; 1903 A->structurally_symmetric = PETSC_TRUE; 1904 A->symmetric_set = PETSC_TRUE; 1905 A->structurally_symmetric_set = PETSC_TRUE; 1906 } 1907 break; 1908 case MAT_SYMMETRIC: 1909 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1910 break; 1911 case MAT_STRUCTURALLY_SYMMETRIC: 1912 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1913 break; 1914 case MAT_HERMITIAN: 1915 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1916 break; 1917 case MAT_SYMMETRY_ETERNAL: 1918 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1919 break; 1920 default: 1921 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1922 } 1923 PetscFunctionReturn(0); 1924 } 1925 1926 #undef __FUNCT__ 1927 #define __FUNCT__ "MatGetRow_MPIAIJ" 1928 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1929 { 1930 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1931 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1932 PetscErrorCode ierr; 1933 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1934 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1935 PetscInt *cmap,*idx_p; 1936 1937 PetscFunctionBegin; 1938 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1939 mat->getrowactive = PETSC_TRUE; 1940 1941 if (!mat->rowvalues && (idx || v)) { 1942 /* 1943 allocate enough space to hold information from the longest row. 1944 */ 1945 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1946 PetscInt max = 1,tmp; 1947 for (i=0; i<matin->rmap->n; i++) { 1948 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1949 if (max < tmp) { max = tmp; } 1950 } 1951 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1952 } 1953 1954 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1955 lrow = row - rstart; 1956 1957 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1958 if (!v) {pvA = 0; pvB = 0;} 1959 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1960 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1961 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1962 nztot = nzA + nzB; 1963 1964 cmap = mat->garray; 1965 if (v || idx) { 1966 if (nztot) { 1967 /* Sort by increasing column numbers, assuming A and B already sorted */ 1968 PetscInt imark = -1; 1969 if (v) { 1970 *v = v_p = mat->rowvalues; 1971 for (i=0; i<nzB; i++) { 1972 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1973 else break; 1974 } 1975 imark = i; 1976 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1977 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1978 } 1979 if (idx) { 1980 *idx = idx_p = mat->rowindices; 1981 if (imark > -1) { 1982 for (i=0; i<imark; i++) { 1983 idx_p[i] = cmap[cworkB[i]]; 1984 } 1985 } else { 1986 for (i=0; i<nzB; i++) { 1987 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1988 else break; 1989 } 1990 imark = i; 1991 } 1992 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1993 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1994 } 1995 } else { 1996 if (idx) *idx = 0; 1997 if (v) *v = 0; 1998 } 1999 } 2000 *nz = nztot; 2001 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 2002 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 2003 PetscFunctionReturn(0); 2004 } 2005 2006 #undef __FUNCT__ 2007 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 2008 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2009 { 2010 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2011 2012 PetscFunctionBegin; 2013 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2014 aij->getrowactive = PETSC_FALSE; 2015 PetscFunctionReturn(0); 2016 } 2017 2018 #undef __FUNCT__ 2019 #define __FUNCT__ "MatNorm_MPIAIJ" 2020 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2021 { 2022 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2023 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2024 PetscErrorCode ierr; 2025 PetscInt i,j,cstart = mat->cmap->rstart; 2026 PetscReal sum = 0.0; 2027 MatScalar *v; 2028 2029 PetscFunctionBegin; 2030 if (aij->size == 1) { 2031 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2032 } else { 2033 if (type == NORM_FROBENIUS) { 2034 v = amat->a; 2035 for (i=0; i<amat->nz; i++) { 2036 #if defined(PETSC_USE_COMPLEX) 2037 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2038 #else 2039 sum += (*v)*(*v); v++; 2040 #endif 2041 } 2042 v = bmat->a; 2043 for (i=0; i<bmat->nz; i++) { 2044 #if defined(PETSC_USE_COMPLEX) 2045 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2046 #else 2047 sum += (*v)*(*v); v++; 2048 #endif 2049 } 2050 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2051 *norm = PetscSqrtReal(*norm); 2052 } else if (type == NORM_1) { /* max column norm */ 2053 PetscReal *tmp,*tmp2; 2054 PetscInt *jj,*garray = aij->garray; 2055 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2056 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2057 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2058 *norm = 0.0; 2059 v = amat->a; jj = amat->j; 2060 for (j=0; j<amat->nz; j++) { 2061 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 2062 } 2063 v = bmat->a; jj = bmat->j; 2064 for (j=0; j<bmat->nz; j++) { 2065 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2066 } 2067 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2068 for (j=0; j<mat->cmap->N; j++) { 2069 if (tmp2[j] > *norm) *norm = tmp2[j]; 2070 } 2071 ierr = PetscFree(tmp);CHKERRQ(ierr); 2072 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2073 } else if (type == NORM_INFINITY) { /* max row norm */ 2074 PetscReal ntemp = 0.0; 2075 for (j=0; j<aij->A->rmap->n; j++) { 2076 v = amat->a + amat->i[j]; 2077 sum = 0.0; 2078 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2079 sum += PetscAbsScalar(*v); v++; 2080 } 2081 v = bmat->a + bmat->i[j]; 2082 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2083 sum += PetscAbsScalar(*v); v++; 2084 } 2085 if (sum > ntemp) ntemp = sum; 2086 } 2087 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2088 } else { 2089 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2090 } 2091 } 2092 PetscFunctionReturn(0); 2093 } 2094 2095 #undef __FUNCT__ 2096 #define __FUNCT__ "MatTranspose_MPIAIJ" 2097 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2098 { 2099 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2100 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2101 PetscErrorCode ierr; 2102 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2103 PetscInt cstart=A->cmap->rstart,ncol; 2104 Mat B; 2105 MatScalar *array; 2106 2107 PetscFunctionBegin; 2108 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2109 2110 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2111 ai = Aloc->i; aj = Aloc->j; 2112 bi = Bloc->i; bj = Bloc->j; 2113 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2114 PetscInt *d_nnz,*g_nnz,*o_nnz; 2115 PetscSFNode *oloc; 2116 PetscSF sf; 2117 2118 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2119 /* compute d_nnz for preallocation */ 2120 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2121 for (i=0; i<ai[ma]; i++) { 2122 d_nnz[aj[i]] ++; 2123 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2124 } 2125 /* compute local off-diagonal contributions */ 2126 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2127 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2128 /* map those to global */ 2129 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 2130 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2131 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2132 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2133 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2134 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2135 2136 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2137 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2138 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2139 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2140 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2141 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2142 } else { 2143 B = *matout; 2144 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2145 for (i=0; i<ai[ma]; i++){ 2146 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2147 } 2148 } 2149 2150 /* copy over the A part */ 2151 array = Aloc->a; 2152 row = A->rmap->rstart; 2153 for (i=0; i<ma; i++) { 2154 ncol = ai[i+1]-ai[i]; 2155 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2156 row++; array += ncol; aj += ncol; 2157 } 2158 aj = Aloc->j; 2159 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2160 2161 /* copy over the B part */ 2162 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2163 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2164 array = Bloc->a; 2165 row = A->rmap->rstart; 2166 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2167 cols_tmp = cols; 2168 for (i=0; i<mb; i++) { 2169 ncol = bi[i+1]-bi[i]; 2170 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2171 row++; array += ncol; cols_tmp += ncol; 2172 } 2173 ierr = PetscFree(cols);CHKERRQ(ierr); 2174 2175 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2176 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2177 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2178 *matout = B; 2179 } else { 2180 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2181 } 2182 PetscFunctionReturn(0); 2183 } 2184 2185 #undef __FUNCT__ 2186 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2187 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2188 { 2189 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2190 Mat a = aij->A,b = aij->B; 2191 PetscErrorCode ierr; 2192 PetscInt s1,s2,s3; 2193 2194 PetscFunctionBegin; 2195 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2196 if (rr) { 2197 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2198 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2199 /* Overlap communication with computation. */ 2200 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2201 } 2202 if (ll) { 2203 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2204 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2205 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2206 } 2207 /* scale the diagonal block */ 2208 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2209 2210 if (rr) { 2211 /* Do a scatter end and then right scale the off-diagonal block */ 2212 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2213 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2214 } 2215 2216 PetscFunctionReturn(0); 2217 } 2218 2219 #undef __FUNCT__ 2220 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2221 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2222 { 2223 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2224 PetscErrorCode ierr; 2225 2226 PetscFunctionBegin; 2227 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2228 PetscFunctionReturn(0); 2229 } 2230 2231 #undef __FUNCT__ 2232 #define __FUNCT__ "MatEqual_MPIAIJ" 2233 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2234 { 2235 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2236 Mat a,b,c,d; 2237 PetscBool flg; 2238 PetscErrorCode ierr; 2239 2240 PetscFunctionBegin; 2241 a = matA->A; b = matA->B; 2242 c = matB->A; d = matB->B; 2243 2244 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2245 if (flg) { 2246 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2247 } 2248 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2249 PetscFunctionReturn(0); 2250 } 2251 2252 #undef __FUNCT__ 2253 #define __FUNCT__ "MatCopy_MPIAIJ" 2254 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2255 { 2256 PetscErrorCode ierr; 2257 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2258 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2259 2260 PetscFunctionBegin; 2261 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2262 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2263 /* because of the column compression in the off-processor part of the matrix a->B, 2264 the number of columns in a->B and b->B may be different, hence we cannot call 2265 the MatCopy() directly on the two parts. If need be, we can provide a more 2266 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2267 then copying the submatrices */ 2268 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2269 } else { 2270 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2271 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2272 } 2273 PetscFunctionReturn(0); 2274 } 2275 2276 #undef __FUNCT__ 2277 #define __FUNCT__ "MatSetUp_MPIAIJ" 2278 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2279 { 2280 PetscErrorCode ierr; 2281 2282 PetscFunctionBegin; 2283 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2284 PetscFunctionReturn(0); 2285 } 2286 2287 #undef __FUNCT__ 2288 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2289 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2290 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2291 { 2292 PetscInt i,m=Y->rmap->N; 2293 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2294 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2295 const PetscInt *xi = x->i,*yi = y->i; 2296 2297 PetscFunctionBegin; 2298 /* Set the number of nonzeros in the new matrix */ 2299 for (i=0; i<m; i++) { 2300 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2301 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2302 nnz[i] = 0; 2303 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2304 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2305 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2306 nnz[i]++; 2307 } 2308 for (; k<nzy; k++) nnz[i]++; 2309 } 2310 PetscFunctionReturn(0); 2311 } 2312 2313 #undef __FUNCT__ 2314 #define __FUNCT__ "MatAXPY_MPIAIJ" 2315 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2316 { 2317 PetscErrorCode ierr; 2318 PetscInt i; 2319 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2320 PetscBLASInt bnz,one=1; 2321 Mat_SeqAIJ *x,*y; 2322 2323 PetscFunctionBegin; 2324 if (str == SAME_NONZERO_PATTERN) { 2325 PetscScalar alpha = a; 2326 x = (Mat_SeqAIJ *)xx->A->data; 2327 y = (Mat_SeqAIJ *)yy->A->data; 2328 bnz = PetscBLASIntCast(x->nz); 2329 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2330 x = (Mat_SeqAIJ *)xx->B->data; 2331 y = (Mat_SeqAIJ *)yy->B->data; 2332 bnz = PetscBLASIntCast(x->nz); 2333 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2334 } else if (str == SUBSET_NONZERO_PATTERN) { 2335 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2336 2337 x = (Mat_SeqAIJ *)xx->B->data; 2338 y = (Mat_SeqAIJ *)yy->B->data; 2339 if (y->xtoy && y->XtoY != xx->B) { 2340 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2341 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2342 } 2343 if (!y->xtoy) { /* get xtoy */ 2344 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2345 y->XtoY = xx->B; 2346 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2347 } 2348 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2349 } else { 2350 Mat B; 2351 PetscInt *nnz_d,*nnz_o; 2352 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2353 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2354 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2355 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2356 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2357 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2358 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2359 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2360 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2361 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2362 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2363 ierr = MatHeaderReplace(Y,B); 2364 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2365 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2366 } 2367 PetscFunctionReturn(0); 2368 } 2369 2370 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2371 2372 #undef __FUNCT__ 2373 #define __FUNCT__ "MatConjugate_MPIAIJ" 2374 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2375 { 2376 #if defined(PETSC_USE_COMPLEX) 2377 PetscErrorCode ierr; 2378 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2379 2380 PetscFunctionBegin; 2381 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2382 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2383 #else 2384 PetscFunctionBegin; 2385 #endif 2386 PetscFunctionReturn(0); 2387 } 2388 2389 #undef __FUNCT__ 2390 #define __FUNCT__ "MatRealPart_MPIAIJ" 2391 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2392 { 2393 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2394 PetscErrorCode ierr; 2395 2396 PetscFunctionBegin; 2397 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2398 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2399 PetscFunctionReturn(0); 2400 } 2401 2402 #undef __FUNCT__ 2403 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2404 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2405 { 2406 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2407 PetscErrorCode ierr; 2408 2409 PetscFunctionBegin; 2410 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2411 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2412 PetscFunctionReturn(0); 2413 } 2414 2415 #ifdef PETSC_HAVE_PBGL 2416 2417 #include <boost/parallel/mpi/bsp_process_group.hpp> 2418 #include <boost/graph/distributed/ilu_default_graph.hpp> 2419 #include <boost/graph/distributed/ilu_0_block.hpp> 2420 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2421 #include <boost/graph/distributed/petsc/interface.hpp> 2422 #include <boost/multi_array.hpp> 2423 #include <boost/parallel/distributed_property_map->hpp> 2424 2425 #undef __FUNCT__ 2426 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2427 /* 2428 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2429 */ 2430 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2431 { 2432 namespace petsc = boost::distributed::petsc; 2433 2434 namespace graph_dist = boost::graph::distributed; 2435 using boost::graph::distributed::ilu_default::process_group_type; 2436 using boost::graph::ilu_permuted; 2437 2438 PetscBool row_identity, col_identity; 2439 PetscContainer c; 2440 PetscInt m, n, M, N; 2441 PetscErrorCode ierr; 2442 2443 PetscFunctionBegin; 2444 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2445 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2446 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2447 if (!row_identity || !col_identity) { 2448 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2449 } 2450 2451 process_group_type pg; 2452 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2453 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2454 lgraph_type& level_graph = *lgraph_p; 2455 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2456 2457 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2458 ilu_permuted(level_graph); 2459 2460 /* put together the new matrix */ 2461 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2462 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2463 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2464 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2465 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2466 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2467 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2468 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2469 2470 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2471 ierr = PetscContainerSetPointer(c, lgraph_p); 2472 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2473 ierr = PetscContainerDestroy(&c); 2474 PetscFunctionReturn(0); 2475 } 2476 2477 #undef __FUNCT__ 2478 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2479 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2480 { 2481 PetscFunctionBegin; 2482 PetscFunctionReturn(0); 2483 } 2484 2485 #undef __FUNCT__ 2486 #define __FUNCT__ "MatSolve_MPIAIJ" 2487 /* 2488 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2489 */ 2490 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2491 { 2492 namespace graph_dist = boost::graph::distributed; 2493 2494 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2495 lgraph_type* lgraph_p; 2496 PetscContainer c; 2497 PetscErrorCode ierr; 2498 2499 PetscFunctionBegin; 2500 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2501 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2502 ierr = VecCopy(b, x);CHKERRQ(ierr); 2503 2504 PetscScalar* array_x; 2505 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2506 PetscInt sx; 2507 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2508 2509 PetscScalar* array_b; 2510 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2511 PetscInt sb; 2512 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2513 2514 lgraph_type& level_graph = *lgraph_p; 2515 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2516 2517 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2518 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2519 ref_x(array_x, boost::extents[num_vertices(graph)]); 2520 2521 typedef boost::iterator_property_map<array_ref_type::iterator, 2522 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2523 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2524 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2525 2526 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2527 2528 PetscFunctionReturn(0); 2529 } 2530 #endif 2531 2532 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2533 PetscInt nzlocal,nsends,nrecvs; 2534 PetscMPIInt *send_rank,*recv_rank; 2535 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2536 PetscScalar *sbuf_a,**rbuf_a; 2537 PetscErrorCode (*Destroy)(Mat); 2538 } Mat_Redundant; 2539 2540 #undef __FUNCT__ 2541 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2542 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2543 { 2544 PetscErrorCode ierr; 2545 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2546 PetscInt i; 2547 2548 PetscFunctionBegin; 2549 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2550 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2551 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2552 for (i=0; i<redund->nrecvs; i++){ 2553 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2554 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2555 } 2556 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2557 ierr = PetscFree(redund);CHKERRQ(ierr); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatDestroy_MatRedundant" 2563 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2564 { 2565 PetscErrorCode ierr; 2566 PetscContainer container; 2567 Mat_Redundant *redund=PETSC_NULL; 2568 2569 PetscFunctionBegin; 2570 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2571 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2572 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2573 A->ops->destroy = redund->Destroy; 2574 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2575 if (A->ops->destroy) { 2576 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2577 } 2578 PetscFunctionReturn(0); 2579 } 2580 2581 #undef __FUNCT__ 2582 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2583 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2584 { 2585 PetscMPIInt rank,size; 2586 MPI_Comm comm=((PetscObject)mat)->comm; 2587 PetscErrorCode ierr; 2588 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2589 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2590 PetscInt *rowrange=mat->rmap->range; 2591 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2592 Mat A=aij->A,B=aij->B,C=*matredundant; 2593 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2594 PetscScalar *sbuf_a; 2595 PetscInt nzlocal=a->nz+b->nz; 2596 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2597 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2598 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2599 MatScalar *aworkA,*aworkB; 2600 PetscScalar *vals; 2601 PetscMPIInt tag1,tag2,tag3,imdex; 2602 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2603 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2604 MPI_Status recv_status,*send_status; 2605 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2606 PetscInt **rbuf_j=PETSC_NULL; 2607 PetscScalar **rbuf_a=PETSC_NULL; 2608 Mat_Redundant *redund=PETSC_NULL; 2609 PetscContainer container; 2610 2611 PetscFunctionBegin; 2612 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2613 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2614 2615 if (reuse == MAT_REUSE_MATRIX) { 2616 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2617 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2618 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2619 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2620 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2621 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2622 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2623 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2624 2625 nsends = redund->nsends; 2626 nrecvs = redund->nrecvs; 2627 send_rank = redund->send_rank; 2628 recv_rank = redund->recv_rank; 2629 sbuf_nz = redund->sbuf_nz; 2630 rbuf_nz = redund->rbuf_nz; 2631 sbuf_j = redund->sbuf_j; 2632 sbuf_a = redund->sbuf_a; 2633 rbuf_j = redund->rbuf_j; 2634 rbuf_a = redund->rbuf_a; 2635 } 2636 2637 if (reuse == MAT_INITIAL_MATRIX){ 2638 PetscMPIInt subrank,subsize; 2639 PetscInt nleftover,np_subcomm; 2640 /* get the destination processors' id send_rank, nsends and nrecvs */ 2641 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2642 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2643 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2644 np_subcomm = size/nsubcomm; 2645 nleftover = size - nsubcomm*np_subcomm; 2646 nsends = 0; nrecvs = 0; 2647 for (i=0; i<size; i++){ /* i=rank*/ 2648 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2649 send_rank[nsends] = i; nsends++; 2650 recv_rank[nrecvs++] = i; 2651 } 2652 } 2653 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2654 i = size-nleftover-1; 2655 j = 0; 2656 while (j < nsubcomm - nleftover){ 2657 send_rank[nsends++] = i; 2658 i--; j++; 2659 } 2660 } 2661 2662 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2663 for (i=0; i<nleftover; i++){ 2664 recv_rank[nrecvs++] = size-nleftover+i; 2665 } 2666 } 2667 2668 /* allocate sbuf_j, sbuf_a */ 2669 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2670 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2671 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2672 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2673 2674 /* copy mat's local entries into the buffers */ 2675 if (reuse == MAT_INITIAL_MATRIX){ 2676 rownz_max = 0; 2677 rptr = sbuf_j; 2678 cols = sbuf_j + rend-rstart + 1; 2679 vals = sbuf_a; 2680 rptr[0] = 0; 2681 for (i=0; i<rend-rstart; i++){ 2682 row = i + rstart; 2683 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2684 ncols = nzA + nzB; 2685 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2686 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2687 /* load the column indices for this row into cols */ 2688 lwrite = 0; 2689 for (l=0; l<nzB; l++) { 2690 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2691 vals[lwrite] = aworkB[l]; 2692 cols[lwrite++] = ctmp; 2693 } 2694 } 2695 for (l=0; l<nzA; l++){ 2696 vals[lwrite] = aworkA[l]; 2697 cols[lwrite++] = cstart + cworkA[l]; 2698 } 2699 for (l=0; l<nzB; l++) { 2700 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2701 vals[lwrite] = aworkB[l]; 2702 cols[lwrite++] = ctmp; 2703 } 2704 } 2705 vals += ncols; 2706 cols += ncols; 2707 rptr[i+1] = rptr[i] + ncols; 2708 if (rownz_max < ncols) rownz_max = ncols; 2709 } 2710 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2711 } else { /* only copy matrix values into sbuf_a */ 2712 rptr = sbuf_j; 2713 vals = sbuf_a; 2714 rptr[0] = 0; 2715 for (i=0; i<rend-rstart; i++){ 2716 row = i + rstart; 2717 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2718 ncols = nzA + nzB; 2719 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2720 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2721 lwrite = 0; 2722 for (l=0; l<nzB; l++) { 2723 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2724 } 2725 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2726 for (l=0; l<nzB; l++) { 2727 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2728 } 2729 vals += ncols; 2730 rptr[i+1] = rptr[i] + ncols; 2731 } 2732 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2733 2734 /* send nzlocal to others, and recv other's nzlocal */ 2735 /*--------------------------------------------------*/ 2736 if (reuse == MAT_INITIAL_MATRIX){ 2737 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2738 s_waits2 = s_waits3 + nsends; 2739 s_waits1 = s_waits2 + nsends; 2740 r_waits1 = s_waits1 + nsends; 2741 r_waits2 = r_waits1 + nrecvs; 2742 r_waits3 = r_waits2 + nrecvs; 2743 } else { 2744 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2745 r_waits3 = s_waits3 + nsends; 2746 } 2747 2748 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2749 if (reuse == MAT_INITIAL_MATRIX){ 2750 /* get new tags to keep the communication clean */ 2751 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2752 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2753 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2754 2755 /* post receives of other's nzlocal */ 2756 for (i=0; i<nrecvs; i++){ 2757 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2758 } 2759 /* send nzlocal to others */ 2760 for (i=0; i<nsends; i++){ 2761 sbuf_nz[i] = nzlocal; 2762 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2763 } 2764 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2765 count = nrecvs; 2766 while (count) { 2767 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2768 recv_rank[imdex] = recv_status.MPI_SOURCE; 2769 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2770 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2771 2772 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2773 rbuf_nz[imdex] += i + 2; 2774 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2775 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2776 count--; 2777 } 2778 /* wait on sends of nzlocal */ 2779 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2780 /* send mat->i,j to others, and recv from other's */ 2781 /*------------------------------------------------*/ 2782 for (i=0; i<nsends; i++){ 2783 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2784 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2785 } 2786 /* wait on receives of mat->i,j */ 2787 /*------------------------------*/ 2788 count = nrecvs; 2789 while (count) { 2790 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2791 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2792 count--; 2793 } 2794 /* wait on sends of mat->i,j */ 2795 /*---------------------------*/ 2796 if (nsends) { 2797 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2798 } 2799 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2800 2801 /* post receives, send and receive mat->a */ 2802 /*----------------------------------------*/ 2803 for (imdex=0; imdex<nrecvs; imdex++) { 2804 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2805 } 2806 for (i=0; i<nsends; i++){ 2807 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2808 } 2809 count = nrecvs; 2810 while (count) { 2811 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2812 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2813 count--; 2814 } 2815 if (nsends) { 2816 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2817 } 2818 2819 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2820 2821 /* create redundant matrix */ 2822 /*-------------------------*/ 2823 if (reuse == MAT_INITIAL_MATRIX){ 2824 /* compute rownz_max for preallocation */ 2825 for (imdex=0; imdex<nrecvs; imdex++){ 2826 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2827 rptr = rbuf_j[imdex]; 2828 for (i=0; i<j; i++){ 2829 ncols = rptr[i+1] - rptr[i]; 2830 if (rownz_max < ncols) rownz_max = ncols; 2831 } 2832 } 2833 2834 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2835 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2836 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2837 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2838 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2839 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2840 } else { 2841 C = *matredundant; 2842 } 2843 2844 /* insert local matrix entries */ 2845 rptr = sbuf_j; 2846 cols = sbuf_j + rend-rstart + 1; 2847 vals = sbuf_a; 2848 for (i=0; i<rend-rstart; i++){ 2849 row = i + rstart; 2850 ncols = rptr[i+1] - rptr[i]; 2851 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2852 vals += ncols; 2853 cols += ncols; 2854 } 2855 /* insert received matrix entries */ 2856 for (imdex=0; imdex<nrecvs; imdex++){ 2857 rstart = rowrange[recv_rank[imdex]]; 2858 rend = rowrange[recv_rank[imdex]+1]; 2859 rptr = rbuf_j[imdex]; 2860 cols = rbuf_j[imdex] + rend-rstart + 1; 2861 vals = rbuf_a[imdex]; 2862 for (i=0; i<rend-rstart; i++){ 2863 row = i + rstart; 2864 ncols = rptr[i+1] - rptr[i]; 2865 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2866 vals += ncols; 2867 cols += ncols; 2868 } 2869 } 2870 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2871 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2872 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2873 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2874 if (reuse == MAT_INITIAL_MATRIX) { 2875 PetscContainer container; 2876 *matredundant = C; 2877 /* create a supporting struct and attach it to C for reuse */ 2878 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2879 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2880 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2881 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2882 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2883 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2884 2885 redund->nzlocal = nzlocal; 2886 redund->nsends = nsends; 2887 redund->nrecvs = nrecvs; 2888 redund->send_rank = send_rank; 2889 redund->recv_rank = recv_rank; 2890 redund->sbuf_nz = sbuf_nz; 2891 redund->rbuf_nz = rbuf_nz; 2892 redund->sbuf_j = sbuf_j; 2893 redund->sbuf_a = sbuf_a; 2894 redund->rbuf_j = rbuf_j; 2895 redund->rbuf_a = rbuf_a; 2896 2897 redund->Destroy = C->ops->destroy; 2898 C->ops->destroy = MatDestroy_MatRedundant; 2899 } 2900 PetscFunctionReturn(0); 2901 } 2902 2903 #undef __FUNCT__ 2904 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2905 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2906 { 2907 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2908 PetscErrorCode ierr; 2909 PetscInt i,*idxb = 0; 2910 PetscScalar *va,*vb; 2911 Vec vtmp; 2912 2913 PetscFunctionBegin; 2914 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2915 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2916 if (idx) { 2917 for (i=0; i<A->rmap->n; i++) { 2918 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2919 } 2920 } 2921 2922 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2923 if (idx) { 2924 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2925 } 2926 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2927 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2928 2929 for (i=0; i<A->rmap->n; i++){ 2930 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2931 va[i] = vb[i]; 2932 if (idx) idx[i] = a->garray[idxb[i]]; 2933 } 2934 } 2935 2936 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2937 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2938 ierr = PetscFree(idxb);CHKERRQ(ierr); 2939 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2940 PetscFunctionReturn(0); 2941 } 2942 2943 #undef __FUNCT__ 2944 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2945 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2946 { 2947 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2948 PetscErrorCode ierr; 2949 PetscInt i,*idxb = 0; 2950 PetscScalar *va,*vb; 2951 Vec vtmp; 2952 2953 PetscFunctionBegin; 2954 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2955 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2956 if (idx) { 2957 for (i=0; i<A->cmap->n; i++) { 2958 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2959 } 2960 } 2961 2962 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2963 if (idx) { 2964 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2965 } 2966 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2967 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2968 2969 for (i=0; i<A->rmap->n; i++){ 2970 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2971 va[i] = vb[i]; 2972 if (idx) idx[i] = a->garray[idxb[i]]; 2973 } 2974 } 2975 2976 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2977 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2978 ierr = PetscFree(idxb);CHKERRQ(ierr); 2979 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2980 PetscFunctionReturn(0); 2981 } 2982 2983 #undef __FUNCT__ 2984 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2985 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2986 { 2987 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2988 PetscInt n = A->rmap->n; 2989 PetscInt cstart = A->cmap->rstart; 2990 PetscInt *cmap = mat->garray; 2991 PetscInt *diagIdx, *offdiagIdx; 2992 Vec diagV, offdiagV; 2993 PetscScalar *a, *diagA, *offdiagA; 2994 PetscInt r; 2995 PetscErrorCode ierr; 2996 2997 PetscFunctionBegin; 2998 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2999 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 3000 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 3001 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3002 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3003 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3004 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3005 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3006 for (r = 0; r < n; ++r) { 3007 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3008 a[r] = diagA[r]; 3009 idx[r] = cstart + diagIdx[r]; 3010 } else { 3011 a[r] = offdiagA[r]; 3012 idx[r] = cmap[offdiagIdx[r]]; 3013 } 3014 } 3015 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3016 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3017 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3018 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3019 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3020 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3021 PetscFunctionReturn(0); 3022 } 3023 3024 #undef __FUNCT__ 3025 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3026 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3027 { 3028 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 3029 PetscInt n = A->rmap->n; 3030 PetscInt cstart = A->cmap->rstart; 3031 PetscInt *cmap = mat->garray; 3032 PetscInt *diagIdx, *offdiagIdx; 3033 Vec diagV, offdiagV; 3034 PetscScalar *a, *diagA, *offdiagA; 3035 PetscInt r; 3036 PetscErrorCode ierr; 3037 3038 PetscFunctionBegin; 3039 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3040 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 3041 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 3042 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3043 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3044 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3045 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3046 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3047 for (r = 0; r < n; ++r) { 3048 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3049 a[r] = diagA[r]; 3050 idx[r] = cstart + diagIdx[r]; 3051 } else { 3052 a[r] = offdiagA[r]; 3053 idx[r] = cmap[offdiagIdx[r]]; 3054 } 3055 } 3056 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3057 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3058 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3059 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3060 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3061 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3062 PetscFunctionReturn(0); 3063 } 3064 3065 #undef __FUNCT__ 3066 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3067 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3068 { 3069 PetscErrorCode ierr; 3070 Mat *dummy; 3071 3072 PetscFunctionBegin; 3073 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3074 *newmat = *dummy; 3075 ierr = PetscFree(dummy);CHKERRQ(ierr); 3076 PetscFunctionReturn(0); 3077 } 3078 3079 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3080 3081 #undef __FUNCT__ 3082 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3083 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3084 { 3085 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3086 PetscErrorCode ierr; 3087 3088 PetscFunctionBegin; 3089 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3090 PetscFunctionReturn(0); 3091 } 3092 3093 #undef __FUNCT__ 3094 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3095 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3096 { 3097 PetscErrorCode ierr; 3098 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3099 3100 PetscFunctionBegin; 3101 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3102 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3103 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3104 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3105 PetscFunctionReturn(0); 3106 } 3107 3108 /* -------------------------------------------------------------------*/ 3109 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3110 MatGetRow_MPIAIJ, 3111 MatRestoreRow_MPIAIJ, 3112 MatMult_MPIAIJ, 3113 /* 4*/ MatMultAdd_MPIAIJ, 3114 MatMultTranspose_MPIAIJ, 3115 MatMultTransposeAdd_MPIAIJ, 3116 #ifdef PETSC_HAVE_PBGL 3117 MatSolve_MPIAIJ, 3118 #else 3119 0, 3120 #endif 3121 0, 3122 0, 3123 /*10*/ 0, 3124 0, 3125 0, 3126 MatSOR_MPIAIJ, 3127 MatTranspose_MPIAIJ, 3128 /*15*/ MatGetInfo_MPIAIJ, 3129 MatEqual_MPIAIJ, 3130 MatGetDiagonal_MPIAIJ, 3131 MatDiagonalScale_MPIAIJ, 3132 MatNorm_MPIAIJ, 3133 /*20*/ MatAssemblyBegin_MPIAIJ, 3134 MatAssemblyEnd_MPIAIJ, 3135 MatSetOption_MPIAIJ, 3136 MatZeroEntries_MPIAIJ, 3137 /*24*/ MatZeroRows_MPIAIJ, 3138 0, 3139 #ifdef PETSC_HAVE_PBGL 3140 0, 3141 #else 3142 0, 3143 #endif 3144 0, 3145 0, 3146 /*29*/ MatSetUp_MPIAIJ, 3147 #ifdef PETSC_HAVE_PBGL 3148 0, 3149 #else 3150 0, 3151 #endif 3152 0, 3153 0, 3154 0, 3155 /*34*/ MatDuplicate_MPIAIJ, 3156 0, 3157 0, 3158 0, 3159 0, 3160 /*39*/ MatAXPY_MPIAIJ, 3161 MatGetSubMatrices_MPIAIJ, 3162 MatIncreaseOverlap_MPIAIJ, 3163 MatGetValues_MPIAIJ, 3164 MatCopy_MPIAIJ, 3165 /*44*/ MatGetRowMax_MPIAIJ, 3166 MatScale_MPIAIJ, 3167 0, 3168 0, 3169 MatZeroRowsColumns_MPIAIJ, 3170 /*49*/ MatSetRandom_MPIAIJ, 3171 0, 3172 0, 3173 0, 3174 0, 3175 /*54*/ MatFDColoringCreate_MPIAIJ, 3176 0, 3177 MatSetUnfactored_MPIAIJ, 3178 MatPermute_MPIAIJ, 3179 0, 3180 /*59*/ MatGetSubMatrix_MPIAIJ, 3181 MatDestroy_MPIAIJ, 3182 MatView_MPIAIJ, 3183 0, 3184 0, 3185 /*64*/ 0, 3186 0, 3187 0, 3188 0, 3189 0, 3190 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3191 MatGetRowMinAbs_MPIAIJ, 3192 0, 3193 MatSetColoring_MPIAIJ, 3194 #if defined(PETSC_HAVE_ADIC) 3195 MatSetValuesAdic_MPIAIJ, 3196 #else 3197 0, 3198 #endif 3199 MatSetValuesAdifor_MPIAIJ, 3200 /*75*/ MatFDColoringApply_AIJ, 3201 0, 3202 0, 3203 0, 3204 MatFindZeroDiagonals_MPIAIJ, 3205 /*80*/ 0, 3206 0, 3207 0, 3208 /*83*/ MatLoad_MPIAIJ, 3209 0, 3210 0, 3211 0, 3212 0, 3213 0, 3214 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3215 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3216 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3217 MatPtAP_Basic, 3218 MatPtAPSymbolic_MPIAIJ, 3219 /*94*/ MatPtAPNumeric_MPIAIJ, 3220 0, 3221 0, 3222 0, 3223 0, 3224 /*99*/ 0, 3225 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3226 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3227 MatConjugate_MPIAIJ, 3228 0, 3229 /*104*/MatSetValuesRow_MPIAIJ, 3230 MatRealPart_MPIAIJ, 3231 MatImaginaryPart_MPIAIJ, 3232 0, 3233 0, 3234 /*109*/0, 3235 MatGetRedundantMatrix_MPIAIJ, 3236 MatGetRowMin_MPIAIJ, 3237 0, 3238 0, 3239 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3240 0, 3241 0, 3242 0, 3243 0, 3244 /*119*/0, 3245 0, 3246 0, 3247 0, 3248 MatGetMultiProcBlock_MPIAIJ, 3249 /*124*/MatFindNonzeroRows_MPIAIJ, 3250 MatGetColumnNorms_MPIAIJ, 3251 MatInvertBlockDiagonal_MPIAIJ, 3252 0, 3253 MatGetSubMatricesParallel_MPIAIJ, 3254 /*129*/0, 3255 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3256 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3257 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3258 0, 3259 /*134*/0, 3260 0, 3261 0, 3262 0, 3263 0 3264 }; 3265 3266 /* ----------------------------------------------------------------------------------------*/ 3267 3268 EXTERN_C_BEGIN 3269 #undef __FUNCT__ 3270 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3271 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3272 { 3273 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3274 PetscErrorCode ierr; 3275 3276 PetscFunctionBegin; 3277 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3278 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3279 PetscFunctionReturn(0); 3280 } 3281 EXTERN_C_END 3282 3283 EXTERN_C_BEGIN 3284 #undef __FUNCT__ 3285 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3286 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3287 { 3288 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3289 PetscErrorCode ierr; 3290 3291 PetscFunctionBegin; 3292 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3293 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3294 PetscFunctionReturn(0); 3295 } 3296 EXTERN_C_END 3297 3298 EXTERN_C_BEGIN 3299 #undef __FUNCT__ 3300 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3301 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3302 { 3303 Mat_MPIAIJ *b; 3304 PetscErrorCode ierr; 3305 PetscInt i; 3306 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3307 3308 PetscFunctionBegin; 3309 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3310 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3311 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3312 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3313 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3314 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3315 3316 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3317 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3318 if (d_nnz) { 3319 for (i=0; i<B->rmap->n; i++) { 3320 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3321 } 3322 } 3323 if (o_nnz) { 3324 for (i=0; i<B->rmap->n; i++) { 3325 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3326 } 3327 } 3328 b = (Mat_MPIAIJ*)B->data; 3329 3330 if (!B->preallocated) { 3331 /* Explicitly create 2 MATSEQAIJ matrices. */ 3332 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3333 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3334 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3335 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3336 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3337 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3338 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3339 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3340 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3341 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3342 } 3343 3344 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3345 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3346 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3347 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3348 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3349 B->preallocated = PETSC_TRUE; 3350 PetscFunctionReturn(0); 3351 } 3352 EXTERN_C_END 3353 3354 #undef __FUNCT__ 3355 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3356 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3357 { 3358 Mat mat; 3359 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3360 PetscErrorCode ierr; 3361 3362 PetscFunctionBegin; 3363 *newmat = 0; 3364 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3365 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3366 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3367 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3368 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3369 a = (Mat_MPIAIJ*)mat->data; 3370 3371 mat->factortype = matin->factortype; 3372 mat->rmap->bs = matin->rmap->bs; 3373 mat->cmap->bs = matin->cmap->bs; 3374 mat->assembled = PETSC_TRUE; 3375 mat->insertmode = NOT_SET_VALUES; 3376 mat->preallocated = PETSC_TRUE; 3377 3378 a->size = oldmat->size; 3379 a->rank = oldmat->rank; 3380 a->donotstash = oldmat->donotstash; 3381 a->roworiented = oldmat->roworiented; 3382 a->rowindices = 0; 3383 a->rowvalues = 0; 3384 a->getrowactive = PETSC_FALSE; 3385 3386 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3387 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3388 3389 if (oldmat->colmap) { 3390 #if defined (PETSC_USE_CTABLE) 3391 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3392 #else 3393 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3394 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3395 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3396 #endif 3397 } else a->colmap = 0; 3398 if (oldmat->garray) { 3399 PetscInt len; 3400 len = oldmat->B->cmap->n; 3401 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3402 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3403 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3404 } else a->garray = 0; 3405 3406 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3407 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3408 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3409 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3410 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3411 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3412 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3413 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3414 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3415 *newmat = mat; 3416 PetscFunctionReturn(0); 3417 } 3418 3419 3420 3421 #undef __FUNCT__ 3422 #define __FUNCT__ "MatLoad_MPIAIJ" 3423 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3424 { 3425 PetscScalar *vals,*svals; 3426 MPI_Comm comm = ((PetscObject)viewer)->comm; 3427 PetscErrorCode ierr; 3428 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3429 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3430 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3431 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3432 PetscInt cend,cstart,n,*rowners,sizesset=1; 3433 int fd; 3434 PetscInt bs = 1; 3435 3436 PetscFunctionBegin; 3437 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3438 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3439 if (!rank) { 3440 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3441 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3442 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3443 } 3444 3445 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3446 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3447 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3448 3449 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3450 3451 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3452 M = header[1]; N = header[2]; 3453 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3454 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3455 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3456 3457 /* If global sizes are set, check if they are consistent with that given in the file */ 3458 if (sizesset) { 3459 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3460 } 3461 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3462 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3463 3464 /* determine ownership of all (block) rows */ 3465 if ( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3466 if (newMat->rmap->n < 0 ) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3467 else m = newMat->rmap->n; /* Set by user */ 3468 3469 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3470 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3471 3472 /* First process needs enough room for process with most rows */ 3473 if (!rank) { 3474 mmax = rowners[1]; 3475 for (i=2; i<=size; i++) { 3476 mmax = PetscMax(mmax, rowners[i]); 3477 } 3478 } else mmax = m; 3479 3480 rowners[0] = 0; 3481 for (i=2; i<=size; i++) { 3482 rowners[i] += rowners[i-1]; 3483 } 3484 rstart = rowners[rank]; 3485 rend = rowners[rank+1]; 3486 3487 /* distribute row lengths to all processors */ 3488 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3489 if (!rank) { 3490 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3491 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3492 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3493 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3494 for (j=0; j<m; j++) { 3495 procsnz[0] += ourlens[j]; 3496 } 3497 for (i=1; i<size; i++) { 3498 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3499 /* calculate the number of nonzeros on each processor */ 3500 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3501 procsnz[i] += rowlengths[j]; 3502 } 3503 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3504 } 3505 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3506 } else { 3507 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3508 } 3509 3510 if (!rank) { 3511 /* determine max buffer needed and allocate it */ 3512 maxnz = 0; 3513 for (i=0; i<size; i++) { 3514 maxnz = PetscMax(maxnz,procsnz[i]); 3515 } 3516 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3517 3518 /* read in my part of the matrix column indices */ 3519 nz = procsnz[0]; 3520 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3521 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3522 3523 /* read in every one elses and ship off */ 3524 for (i=1; i<size; i++) { 3525 nz = procsnz[i]; 3526 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3527 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3528 } 3529 ierr = PetscFree(cols);CHKERRQ(ierr); 3530 } else { 3531 /* determine buffer space needed for message */ 3532 nz = 0; 3533 for (i=0; i<m; i++) { 3534 nz += ourlens[i]; 3535 } 3536 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3537 3538 /* receive message of column indices*/ 3539 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3540 } 3541 3542 /* determine column ownership if matrix is not square */ 3543 if (N != M) { 3544 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3545 else n = newMat->cmap->n; 3546 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3547 cstart = cend - n; 3548 } else { 3549 cstart = rstart; 3550 cend = rend; 3551 n = cend - cstart; 3552 } 3553 3554 /* loop over local rows, determining number of off diagonal entries */ 3555 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3556 jj = 0; 3557 for (i=0; i<m; i++) { 3558 for (j=0; j<ourlens[i]; j++) { 3559 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3560 jj++; 3561 } 3562 } 3563 3564 for (i=0; i<m; i++) { 3565 ourlens[i] -= offlens[i]; 3566 } 3567 if (!sizesset) { 3568 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3569 } 3570 3571 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3572 3573 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3574 3575 for (i=0; i<m; i++) { 3576 ourlens[i] += offlens[i]; 3577 } 3578 3579 if (!rank) { 3580 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3581 3582 /* read in my part of the matrix numerical values */ 3583 nz = procsnz[0]; 3584 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3585 3586 /* insert into matrix */ 3587 jj = rstart; 3588 smycols = mycols; 3589 svals = vals; 3590 for (i=0; i<m; i++) { 3591 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3592 smycols += ourlens[i]; 3593 svals += ourlens[i]; 3594 jj++; 3595 } 3596 3597 /* read in other processors and ship out */ 3598 for (i=1; i<size; i++) { 3599 nz = procsnz[i]; 3600 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3601 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3602 } 3603 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3604 } else { 3605 /* receive numeric values */ 3606 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3607 3608 /* receive message of values*/ 3609 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3610 3611 /* insert into matrix */ 3612 jj = rstart; 3613 smycols = mycols; 3614 svals = vals; 3615 for (i=0; i<m; i++) { 3616 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3617 smycols += ourlens[i]; 3618 svals += ourlens[i]; 3619 jj++; 3620 } 3621 } 3622 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3623 ierr = PetscFree(vals);CHKERRQ(ierr); 3624 ierr = PetscFree(mycols);CHKERRQ(ierr); 3625 ierr = PetscFree(rowners);CHKERRQ(ierr); 3626 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3627 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3628 3629 PetscFunctionReturn(0); 3630 } 3631 3632 #undef __FUNCT__ 3633 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3634 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3635 { 3636 PetscErrorCode ierr; 3637 IS iscol_local; 3638 PetscInt csize; 3639 3640 PetscFunctionBegin; 3641 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3642 if (call == MAT_REUSE_MATRIX) { 3643 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3644 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3645 } else { 3646 PetscInt cbs; 3647 ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr); 3648 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3649 ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr); 3650 } 3651 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3652 if (call == MAT_INITIAL_MATRIX) { 3653 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3654 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3655 } 3656 PetscFunctionReturn(0); 3657 } 3658 3659 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3660 #undef __FUNCT__ 3661 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3662 /* 3663 Not great since it makes two copies of the submatrix, first an SeqAIJ 3664 in local and then by concatenating the local matrices the end result. 3665 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3666 3667 Note: This requires a sequential iscol with all indices. 3668 */ 3669 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3670 { 3671 PetscErrorCode ierr; 3672 PetscMPIInt rank,size; 3673 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3674 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3675 PetscBool allcolumns, colflag; 3676 Mat M,Mreuse; 3677 MatScalar *vwork,*aa; 3678 MPI_Comm comm = ((PetscObject)mat)->comm; 3679 Mat_SeqAIJ *aij; 3680 3681 3682 PetscFunctionBegin; 3683 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3684 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3685 3686 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3687 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3688 if (colflag && ncol == mat->cmap->N){ 3689 allcolumns = PETSC_TRUE; 3690 } else { 3691 allcolumns = PETSC_FALSE; 3692 } 3693 if (call == MAT_REUSE_MATRIX) { 3694 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3695 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3696 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3697 } else { 3698 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3699 } 3700 3701 /* 3702 m - number of local rows 3703 n - number of columns (same on all processors) 3704 rstart - first row in new global matrix generated 3705 */ 3706 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3707 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3708 if (call == MAT_INITIAL_MATRIX) { 3709 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3710 ii = aij->i; 3711 jj = aij->j; 3712 3713 /* 3714 Determine the number of non-zeros in the diagonal and off-diagonal 3715 portions of the matrix in order to do correct preallocation 3716 */ 3717 3718 /* first get start and end of "diagonal" columns */ 3719 if (csize == PETSC_DECIDE) { 3720 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3721 if (mglobal == n) { /* square matrix */ 3722 nlocal = m; 3723 } else { 3724 nlocal = n/size + ((n % size) > rank); 3725 } 3726 } else { 3727 nlocal = csize; 3728 } 3729 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3730 rstart = rend - nlocal; 3731 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3732 3733 /* next, compute all the lengths */ 3734 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3735 olens = dlens + m; 3736 for (i=0; i<m; i++) { 3737 jend = ii[i+1] - ii[i]; 3738 olen = 0; 3739 dlen = 0; 3740 for (j=0; j<jend; j++) { 3741 if (*jj < rstart || *jj >= rend) olen++; 3742 else dlen++; 3743 jj++; 3744 } 3745 olens[i] = olen; 3746 dlens[i] = dlen; 3747 } 3748 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3749 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3750 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3751 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3752 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3753 ierr = PetscFree(dlens);CHKERRQ(ierr); 3754 } else { 3755 PetscInt ml,nl; 3756 3757 M = *newmat; 3758 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3759 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3760 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3761 /* 3762 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3763 rather than the slower MatSetValues(). 3764 */ 3765 M->was_assembled = PETSC_TRUE; 3766 M->assembled = PETSC_FALSE; 3767 } 3768 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3769 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3770 ii = aij->i; 3771 jj = aij->j; 3772 aa = aij->a; 3773 for (i=0; i<m; i++) { 3774 row = rstart + i; 3775 nz = ii[i+1] - ii[i]; 3776 cwork = jj; jj += nz; 3777 vwork = aa; aa += nz; 3778 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3779 } 3780 3781 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3782 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3783 *newmat = M; 3784 3785 /* save submatrix used in processor for next request */ 3786 if (call == MAT_INITIAL_MATRIX) { 3787 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3788 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3789 } 3790 3791 PetscFunctionReturn(0); 3792 } 3793 3794 EXTERN_C_BEGIN 3795 #undef __FUNCT__ 3796 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3797 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3798 { 3799 PetscInt m,cstart, cend,j,nnz,i,d; 3800 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3801 const PetscInt *JJ; 3802 PetscScalar *values; 3803 PetscErrorCode ierr; 3804 3805 PetscFunctionBegin; 3806 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3807 3808 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3809 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3810 m = B->rmap->n; 3811 cstart = B->cmap->rstart; 3812 cend = B->cmap->rend; 3813 rstart = B->rmap->rstart; 3814 3815 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3816 3817 #if defined(PETSC_USE_DEBUGGING) 3818 for (i=0; i<m; i++) { 3819 nnz = Ii[i+1]- Ii[i]; 3820 JJ = J + Ii[i]; 3821 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3822 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3823 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3824 } 3825 #endif 3826 3827 for (i=0; i<m; i++) { 3828 nnz = Ii[i+1]- Ii[i]; 3829 JJ = J + Ii[i]; 3830 nnz_max = PetscMax(nnz_max,nnz); 3831 d = 0; 3832 for (j=0; j<nnz; j++) { 3833 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3834 } 3835 d_nnz[i] = d; 3836 o_nnz[i] = nnz - d; 3837 } 3838 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3839 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3840 3841 if (v) values = (PetscScalar*)v; 3842 else { 3843 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3844 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3845 } 3846 3847 for (i=0; i<m; i++) { 3848 ii = i + rstart; 3849 nnz = Ii[i+1]- Ii[i]; 3850 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3851 } 3852 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3853 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3854 3855 if (!v) { 3856 ierr = PetscFree(values);CHKERRQ(ierr); 3857 } 3858 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3859 PetscFunctionReturn(0); 3860 } 3861 EXTERN_C_END 3862 3863 #undef __FUNCT__ 3864 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3865 /*@ 3866 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3867 (the default parallel PETSc format). 3868 3869 Collective on MPI_Comm 3870 3871 Input Parameters: 3872 + B - the matrix 3873 . i - the indices into j for the start of each local row (starts with zero) 3874 . j - the column indices for each local row (starts with zero) 3875 - v - optional values in the matrix 3876 3877 Level: developer 3878 3879 Notes: 3880 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3881 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3882 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3883 3884 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3885 3886 The format which is used for the sparse matrix input, is equivalent to a 3887 row-major ordering.. i.e for the following matrix, the input data expected is 3888 as shown: 3889 3890 1 0 0 3891 2 0 3 P0 3892 ------- 3893 4 5 6 P1 3894 3895 Process0 [P0]: rows_owned=[0,1] 3896 i = {0,1,3} [size = nrow+1 = 2+1] 3897 j = {0,0,2} [size = nz = 6] 3898 v = {1,2,3} [size = nz = 6] 3899 3900 Process1 [P1]: rows_owned=[2] 3901 i = {0,3} [size = nrow+1 = 1+1] 3902 j = {0,1,2} [size = nz = 6] 3903 v = {4,5,6} [size = nz = 6] 3904 3905 .keywords: matrix, aij, compressed row, sparse, parallel 3906 3907 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3908 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3909 @*/ 3910 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3911 { 3912 PetscErrorCode ierr; 3913 3914 PetscFunctionBegin; 3915 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3916 PetscFunctionReturn(0); 3917 } 3918 3919 #undef __FUNCT__ 3920 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3921 /*@C 3922 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3923 (the default parallel PETSc format). For good matrix assembly performance 3924 the user should preallocate the matrix storage by setting the parameters 3925 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3926 performance can be increased by more than a factor of 50. 3927 3928 Collective on MPI_Comm 3929 3930 Input Parameters: 3931 + A - the matrix 3932 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3933 (same value is used for all local rows) 3934 . d_nnz - array containing the number of nonzeros in the various rows of the 3935 DIAGONAL portion of the local submatrix (possibly different for each row) 3936 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3937 The size of this array is equal to the number of local rows, i.e 'm'. 3938 For matrices that will be factored, you must leave room for (and set) 3939 the diagonal entry even if it is zero. 3940 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3941 submatrix (same value is used for all local rows). 3942 - o_nnz - array containing the number of nonzeros in the various rows of the 3943 OFF-DIAGONAL portion of the local submatrix (possibly different for 3944 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3945 structure. The size of this array is equal to the number 3946 of local rows, i.e 'm'. 3947 3948 If the *_nnz parameter is given then the *_nz parameter is ignored 3949 3950 The AIJ format (also called the Yale sparse matrix format or 3951 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3952 storage. The stored row and column indices begin with zero. 3953 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3954 3955 The parallel matrix is partitioned such that the first m0 rows belong to 3956 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3957 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3958 3959 The DIAGONAL portion of the local submatrix of a processor can be defined 3960 as the submatrix which is obtained by extraction the part corresponding to 3961 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3962 first row that belongs to the processor, r2 is the last row belonging to 3963 the this processor, and c1-c2 is range of indices of the local part of a 3964 vector suitable for applying the matrix to. This is an mxn matrix. In the 3965 common case of a square matrix, the row and column ranges are the same and 3966 the DIAGONAL part is also square. The remaining portion of the local 3967 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3968 3969 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3970 3971 You can call MatGetInfo() to get information on how effective the preallocation was; 3972 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3973 You can also run with the option -info and look for messages with the string 3974 malloc in them to see if additional memory allocation was needed. 3975 3976 Example usage: 3977 3978 Consider the following 8x8 matrix with 34 non-zero values, that is 3979 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3980 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3981 as follows: 3982 3983 .vb 3984 1 2 0 | 0 3 0 | 0 4 3985 Proc0 0 5 6 | 7 0 0 | 8 0 3986 9 0 10 | 11 0 0 | 12 0 3987 ------------------------------------- 3988 13 0 14 | 15 16 17 | 0 0 3989 Proc1 0 18 0 | 19 20 21 | 0 0 3990 0 0 0 | 22 23 0 | 24 0 3991 ------------------------------------- 3992 Proc2 25 26 27 | 0 0 28 | 29 0 3993 30 0 0 | 31 32 33 | 0 34 3994 .ve 3995 3996 This can be represented as a collection of submatrices as: 3997 3998 .vb 3999 A B C 4000 D E F 4001 G H I 4002 .ve 4003 4004 Where the submatrices A,B,C are owned by proc0, D,E,F are 4005 owned by proc1, G,H,I are owned by proc2. 4006 4007 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4008 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4009 The 'M','N' parameters are 8,8, and have the same values on all procs. 4010 4011 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4012 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4013 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4014 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4015 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4016 matrix, ans [DF] as another SeqAIJ matrix. 4017 4018 When d_nz, o_nz parameters are specified, d_nz storage elements are 4019 allocated for every row of the local diagonal submatrix, and o_nz 4020 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4021 One way to choose d_nz and o_nz is to use the max nonzerors per local 4022 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4023 In this case, the values of d_nz,o_nz are: 4024 .vb 4025 proc0 : dnz = 2, o_nz = 2 4026 proc1 : dnz = 3, o_nz = 2 4027 proc2 : dnz = 1, o_nz = 4 4028 .ve 4029 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4030 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4031 for proc3. i.e we are using 12+15+10=37 storage locations to store 4032 34 values. 4033 4034 When d_nnz, o_nnz parameters are specified, the storage is specified 4035 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4036 In the above case the values for d_nnz,o_nnz are: 4037 .vb 4038 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4039 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4040 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4041 .ve 4042 Here the space allocated is sum of all the above values i.e 34, and 4043 hence pre-allocation is perfect. 4044 4045 Level: intermediate 4046 4047 .keywords: matrix, aij, compressed row, sparse, parallel 4048 4049 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4050 MPIAIJ, MatGetInfo() 4051 @*/ 4052 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4053 { 4054 PetscErrorCode ierr; 4055 4056 PetscFunctionBegin; 4057 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4058 PetscValidType(B,1); 4059 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4060 PetscFunctionReturn(0); 4061 } 4062 4063 #undef __FUNCT__ 4064 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4065 /*@ 4066 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4067 CSR format the local rows. 4068 4069 Collective on MPI_Comm 4070 4071 Input Parameters: 4072 + comm - MPI communicator 4073 . m - number of local rows (Cannot be PETSC_DECIDE) 4074 . n - This value should be the same as the local size used in creating the 4075 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4076 calculated if N is given) For square matrices n is almost always m. 4077 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4078 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4079 . i - row indices 4080 . j - column indices 4081 - a - matrix values 4082 4083 Output Parameter: 4084 . mat - the matrix 4085 4086 Level: intermediate 4087 4088 Notes: 4089 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4090 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4091 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4092 4093 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4094 4095 The format which is used for the sparse matrix input, is equivalent to a 4096 row-major ordering.. i.e for the following matrix, the input data expected is 4097 as shown: 4098 4099 1 0 0 4100 2 0 3 P0 4101 ------- 4102 4 5 6 P1 4103 4104 Process0 [P0]: rows_owned=[0,1] 4105 i = {0,1,3} [size = nrow+1 = 2+1] 4106 j = {0,0,2} [size = nz = 6] 4107 v = {1,2,3} [size = nz = 6] 4108 4109 Process1 [P1]: rows_owned=[2] 4110 i = {0,3} [size = nrow+1 = 1+1] 4111 j = {0,1,2} [size = nz = 6] 4112 v = {4,5,6} [size = nz = 6] 4113 4114 .keywords: matrix, aij, compressed row, sparse, parallel 4115 4116 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4117 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4118 @*/ 4119 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4120 { 4121 PetscErrorCode ierr; 4122 4123 PetscFunctionBegin; 4124 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4125 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4126 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4127 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4128 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 4129 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4130 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4131 PetscFunctionReturn(0); 4132 } 4133 4134 #undef __FUNCT__ 4135 #define __FUNCT__ "MatCreateAIJ" 4136 /*@C 4137 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4138 (the default parallel PETSc format). For good matrix assembly performance 4139 the user should preallocate the matrix storage by setting the parameters 4140 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4141 performance can be increased by more than a factor of 50. 4142 4143 Collective on MPI_Comm 4144 4145 Input Parameters: 4146 + comm - MPI communicator 4147 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4148 This value should be the same as the local size used in creating the 4149 y vector for the matrix-vector product y = Ax. 4150 . n - This value should be the same as the local size used in creating the 4151 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4152 calculated if N is given) For square matrices n is almost always m. 4153 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4154 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4155 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4156 (same value is used for all local rows) 4157 . d_nnz - array containing the number of nonzeros in the various rows of the 4158 DIAGONAL portion of the local submatrix (possibly different for each row) 4159 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4160 The size of this array is equal to the number of local rows, i.e 'm'. 4161 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4162 submatrix (same value is used for all local rows). 4163 - o_nnz - array containing the number of nonzeros in the various rows of the 4164 OFF-DIAGONAL portion of the local submatrix (possibly different for 4165 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4166 structure. The size of this array is equal to the number 4167 of local rows, i.e 'm'. 4168 4169 Output Parameter: 4170 . A - the matrix 4171 4172 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4173 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4174 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4175 4176 Notes: 4177 If the *_nnz parameter is given then the *_nz parameter is ignored 4178 4179 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4180 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4181 storage requirements for this matrix. 4182 4183 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4184 processor than it must be used on all processors that share the object for 4185 that argument. 4186 4187 The user MUST specify either the local or global matrix dimensions 4188 (possibly both). 4189 4190 The parallel matrix is partitioned across processors such that the 4191 first m0 rows belong to process 0, the next m1 rows belong to 4192 process 1, the next m2 rows belong to process 2 etc.. where 4193 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4194 values corresponding to [m x N] submatrix. 4195 4196 The columns are logically partitioned with the n0 columns belonging 4197 to 0th partition, the next n1 columns belonging to the next 4198 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4199 4200 The DIAGONAL portion of the local submatrix on any given processor 4201 is the submatrix corresponding to the rows and columns m,n 4202 corresponding to the given processor. i.e diagonal matrix on 4203 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4204 etc. The remaining portion of the local submatrix [m x (N-n)] 4205 constitute the OFF-DIAGONAL portion. The example below better 4206 illustrates this concept. 4207 4208 For a square global matrix we define each processor's diagonal portion 4209 to be its local rows and the corresponding columns (a square submatrix); 4210 each processor's off-diagonal portion encompasses the remainder of the 4211 local matrix (a rectangular submatrix). 4212 4213 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4214 4215 When calling this routine with a single process communicator, a matrix of 4216 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4217 type of communicator, use the construction mechanism: 4218 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4219 4220 By default, this format uses inodes (identical nodes) when possible. 4221 We search for consecutive rows with the same nonzero structure, thereby 4222 reusing matrix information to achieve increased efficiency. 4223 4224 Options Database Keys: 4225 + -mat_no_inode - Do not use inodes 4226 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4227 - -mat_aij_oneindex - Internally use indexing starting at 1 4228 rather than 0. Note that when calling MatSetValues(), 4229 the user still MUST index entries starting at 0! 4230 4231 4232 Example usage: 4233 4234 Consider the following 8x8 matrix with 34 non-zero values, that is 4235 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4236 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4237 as follows: 4238 4239 .vb 4240 1 2 0 | 0 3 0 | 0 4 4241 Proc0 0 5 6 | 7 0 0 | 8 0 4242 9 0 10 | 11 0 0 | 12 0 4243 ------------------------------------- 4244 13 0 14 | 15 16 17 | 0 0 4245 Proc1 0 18 0 | 19 20 21 | 0 0 4246 0 0 0 | 22 23 0 | 24 0 4247 ------------------------------------- 4248 Proc2 25 26 27 | 0 0 28 | 29 0 4249 30 0 0 | 31 32 33 | 0 34 4250 .ve 4251 4252 This can be represented as a collection of submatrices as: 4253 4254 .vb 4255 A B C 4256 D E F 4257 G H I 4258 .ve 4259 4260 Where the submatrices A,B,C are owned by proc0, D,E,F are 4261 owned by proc1, G,H,I are owned by proc2. 4262 4263 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4264 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4265 The 'M','N' parameters are 8,8, and have the same values on all procs. 4266 4267 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4268 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4269 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4270 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4271 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4272 matrix, ans [DF] as another SeqAIJ matrix. 4273 4274 When d_nz, o_nz parameters are specified, d_nz storage elements are 4275 allocated for every row of the local diagonal submatrix, and o_nz 4276 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4277 One way to choose d_nz and o_nz is to use the max nonzerors per local 4278 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4279 In this case, the values of d_nz,o_nz are: 4280 .vb 4281 proc0 : dnz = 2, o_nz = 2 4282 proc1 : dnz = 3, o_nz = 2 4283 proc2 : dnz = 1, o_nz = 4 4284 .ve 4285 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4286 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4287 for proc3. i.e we are using 12+15+10=37 storage locations to store 4288 34 values. 4289 4290 When d_nnz, o_nnz parameters are specified, the storage is specified 4291 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4292 In the above case the values for d_nnz,o_nnz are: 4293 .vb 4294 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4295 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4296 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4297 .ve 4298 Here the space allocated is sum of all the above values i.e 34, and 4299 hence pre-allocation is perfect. 4300 4301 Level: intermediate 4302 4303 .keywords: matrix, aij, compressed row, sparse, parallel 4304 4305 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4306 MPIAIJ, MatCreateMPIAIJWithArrays() 4307 @*/ 4308 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4309 { 4310 PetscErrorCode ierr; 4311 PetscMPIInt size; 4312 4313 PetscFunctionBegin; 4314 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4315 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4316 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4317 if (size > 1) { 4318 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4319 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4320 } else { 4321 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4322 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4323 } 4324 PetscFunctionReturn(0); 4325 } 4326 4327 #undef __FUNCT__ 4328 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4329 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4330 { 4331 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4332 4333 PetscFunctionBegin; 4334 *Ad = a->A; 4335 *Ao = a->B; 4336 *colmap = a->garray; 4337 PetscFunctionReturn(0); 4338 } 4339 4340 #undef __FUNCT__ 4341 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4342 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4343 { 4344 PetscErrorCode ierr; 4345 PetscInt i; 4346 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4347 4348 PetscFunctionBegin; 4349 if (coloring->ctype == IS_COLORING_GLOBAL) { 4350 ISColoringValue *allcolors,*colors; 4351 ISColoring ocoloring; 4352 4353 /* set coloring for diagonal portion */ 4354 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4355 4356 /* set coloring for off-diagonal portion */ 4357 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4358 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4359 for (i=0; i<a->B->cmap->n; i++) { 4360 colors[i] = allcolors[a->garray[i]]; 4361 } 4362 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4363 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4364 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4365 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4366 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4367 ISColoringValue *colors; 4368 PetscInt *larray; 4369 ISColoring ocoloring; 4370 4371 /* set coloring for diagonal portion */ 4372 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4373 for (i=0; i<a->A->cmap->n; i++) { 4374 larray[i] = i + A->cmap->rstart; 4375 } 4376 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4377 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4378 for (i=0; i<a->A->cmap->n; i++) { 4379 colors[i] = coloring->colors[larray[i]]; 4380 } 4381 ierr = PetscFree(larray);CHKERRQ(ierr); 4382 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4383 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4384 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4385 4386 /* set coloring for off-diagonal portion */ 4387 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4388 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4389 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4390 for (i=0; i<a->B->cmap->n; i++) { 4391 colors[i] = coloring->colors[larray[i]]; 4392 } 4393 ierr = PetscFree(larray);CHKERRQ(ierr); 4394 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4395 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4396 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4397 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4398 4399 PetscFunctionReturn(0); 4400 } 4401 4402 #if defined(PETSC_HAVE_ADIC) 4403 #undef __FUNCT__ 4404 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4405 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4406 { 4407 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4408 PetscErrorCode ierr; 4409 4410 PetscFunctionBegin; 4411 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4412 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4413 PetscFunctionReturn(0); 4414 } 4415 #endif 4416 4417 #undef __FUNCT__ 4418 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4419 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4420 { 4421 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4422 PetscErrorCode ierr; 4423 4424 PetscFunctionBegin; 4425 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4426 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4427 PetscFunctionReturn(0); 4428 } 4429 4430 #undef __FUNCT__ 4431 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4432 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4433 { 4434 PetscErrorCode ierr; 4435 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4436 PetscInt *indx; 4437 4438 PetscFunctionBegin; 4439 /* This routine will ONLY return MPIAIJ type matrix */ 4440 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4441 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4442 if (n == PETSC_DECIDE){ 4443 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4444 } 4445 /* Check sum(n) = N */ 4446 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4447 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4448 4449 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4450 rstart -= m; 4451 4452 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4453 for (i=0;i<m;i++) { 4454 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4455 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4456 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4457 } 4458 4459 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4460 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4461 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4462 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4463 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4464 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4465 PetscFunctionReturn(0); 4466 } 4467 4468 #undef __FUNCT__ 4469 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4470 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4471 { 4472 PetscErrorCode ierr; 4473 PetscInt m,N,i,rstart,nnz,Ii; 4474 PetscInt *indx; 4475 PetscScalar *values; 4476 4477 PetscFunctionBegin; 4478 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4479 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4480 for (i=0;i<m;i++) { 4481 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4482 Ii = i + rstart; 4483 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4484 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4485 } 4486 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4487 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4488 PetscFunctionReturn(0); 4489 } 4490 4491 #undef __FUNCT__ 4492 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4493 /*@ 4494 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4495 matrices from each processor 4496 4497 Collective on MPI_Comm 4498 4499 Input Parameters: 4500 + comm - the communicators the parallel matrix will live on 4501 . inmat - the input sequential matrices 4502 . n - number of local columns (or PETSC_DECIDE) 4503 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4504 4505 Output Parameter: 4506 . outmat - the parallel matrix generated 4507 4508 Level: advanced 4509 4510 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4511 4512 @*/ 4513 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4514 { 4515 PetscErrorCode ierr; 4516 4517 PetscFunctionBegin; 4518 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4519 if (scall == MAT_INITIAL_MATRIX){ 4520 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4521 } 4522 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4523 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4524 PetscFunctionReturn(0); 4525 } 4526 4527 #undef __FUNCT__ 4528 #define __FUNCT__ "MatFileSplit" 4529 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4530 { 4531 PetscErrorCode ierr; 4532 PetscMPIInt rank; 4533 PetscInt m,N,i,rstart,nnz; 4534 size_t len; 4535 const PetscInt *indx; 4536 PetscViewer out; 4537 char *name; 4538 Mat B; 4539 const PetscScalar *values; 4540 4541 PetscFunctionBegin; 4542 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4543 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4544 /* Should this be the type of the diagonal block of A? */ 4545 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4546 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4547 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4548 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4549 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4550 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4551 for (i=0;i<m;i++) { 4552 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4553 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4554 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4555 } 4556 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4557 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4558 4559 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4560 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4561 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4562 sprintf(name,"%s.%d",outfile,rank); 4563 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4564 ierr = PetscFree(name); 4565 ierr = MatView(B,out);CHKERRQ(ierr); 4566 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4567 ierr = MatDestroy(&B);CHKERRQ(ierr); 4568 PetscFunctionReturn(0); 4569 } 4570 4571 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4572 #undef __FUNCT__ 4573 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4574 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4575 { 4576 PetscErrorCode ierr; 4577 Mat_Merge_SeqsToMPI *merge; 4578 PetscContainer container; 4579 4580 PetscFunctionBegin; 4581 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4582 if (container) { 4583 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4584 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4585 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4586 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4587 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4588 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4589 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4590 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4591 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4592 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4593 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4594 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4595 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4596 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4597 ierr = PetscFree(merge);CHKERRQ(ierr); 4598 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4599 } 4600 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4601 PetscFunctionReturn(0); 4602 } 4603 4604 #include <../src/mat/utils/freespace.h> 4605 #include <petscbt.h> 4606 4607 #undef __FUNCT__ 4608 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4609 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4610 { 4611 PetscErrorCode ierr; 4612 MPI_Comm comm=((PetscObject)mpimat)->comm; 4613 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4614 PetscMPIInt size,rank,taga,*len_s; 4615 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4616 PetscInt proc,m; 4617 PetscInt **buf_ri,**buf_rj; 4618 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4619 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4620 MPI_Request *s_waits,*r_waits; 4621 MPI_Status *status; 4622 MatScalar *aa=a->a; 4623 MatScalar **abuf_r,*ba_i; 4624 Mat_Merge_SeqsToMPI *merge; 4625 PetscContainer container; 4626 4627 PetscFunctionBegin; 4628 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4629 4630 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4631 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4632 4633 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4634 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4635 4636 bi = merge->bi; 4637 bj = merge->bj; 4638 buf_ri = merge->buf_ri; 4639 buf_rj = merge->buf_rj; 4640 4641 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4642 owners = merge->rowmap->range; 4643 len_s = merge->len_s; 4644 4645 /* send and recv matrix values */ 4646 /*-----------------------------*/ 4647 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4648 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4649 4650 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4651 for (proc=0,k=0; proc<size; proc++){ 4652 if (!len_s[proc]) continue; 4653 i = owners[proc]; 4654 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4655 k++; 4656 } 4657 4658 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4659 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4660 ierr = PetscFree(status);CHKERRQ(ierr); 4661 4662 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4663 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4664 4665 /* insert mat values of mpimat */ 4666 /*----------------------------*/ 4667 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4668 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4669 4670 for (k=0; k<merge->nrecv; k++){ 4671 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4672 nrows = *(buf_ri_k[k]); 4673 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4674 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4675 } 4676 4677 /* set values of ba */ 4678 m = merge->rowmap->n; 4679 for (i=0; i<m; i++) { 4680 arow = owners[rank] + i; 4681 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4682 bnzi = bi[i+1] - bi[i]; 4683 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4684 4685 /* add local non-zero vals of this proc's seqmat into ba */ 4686 anzi = ai[arow+1] - ai[arow]; 4687 aj = a->j + ai[arow]; 4688 aa = a->a + ai[arow]; 4689 nextaj = 0; 4690 for (j=0; nextaj<anzi; j++){ 4691 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4692 ba_i[j] += aa[nextaj++]; 4693 } 4694 } 4695 4696 /* add received vals into ba */ 4697 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4698 /* i-th row */ 4699 if (i == *nextrow[k]) { 4700 anzi = *(nextai[k]+1) - *nextai[k]; 4701 aj = buf_rj[k] + *(nextai[k]); 4702 aa = abuf_r[k] + *(nextai[k]); 4703 nextaj = 0; 4704 for (j=0; nextaj<anzi; j++){ 4705 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4706 ba_i[j] += aa[nextaj++]; 4707 } 4708 } 4709 nextrow[k]++; nextai[k]++; 4710 } 4711 } 4712 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4713 } 4714 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4715 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4716 4717 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4718 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4719 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4720 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4721 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4722 PetscFunctionReturn(0); 4723 } 4724 4725 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4726 4727 #undef __FUNCT__ 4728 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4729 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4730 { 4731 PetscErrorCode ierr; 4732 Mat B_mpi; 4733 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4734 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4735 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4736 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4737 PetscInt len,proc,*dnz,*onz,bs,cbs; 4738 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4739 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4740 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4741 MPI_Status *status; 4742 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4743 PetscBT lnkbt; 4744 Mat_Merge_SeqsToMPI *merge; 4745 PetscContainer container; 4746 4747 PetscFunctionBegin; 4748 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4749 4750 /* make sure it is a PETSc comm */ 4751 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4752 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4753 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4754 4755 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4756 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4757 4758 /* determine row ownership */ 4759 /*---------------------------------------------------------*/ 4760 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4761 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4762 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4763 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4764 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4765 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4766 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4767 4768 m = merge->rowmap->n; 4769 M = merge->rowmap->N; 4770 owners = merge->rowmap->range; 4771 4772 /* determine the number of messages to send, their lengths */ 4773 /*---------------------------------------------------------*/ 4774 len_s = merge->len_s; 4775 4776 len = 0; /* length of buf_si[] */ 4777 merge->nsend = 0; 4778 for (proc=0; proc<size; proc++){ 4779 len_si[proc] = 0; 4780 if (proc == rank){ 4781 len_s[proc] = 0; 4782 } else { 4783 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4784 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4785 } 4786 if (len_s[proc]) { 4787 merge->nsend++; 4788 nrows = 0; 4789 for (i=owners[proc]; i<owners[proc+1]; i++){ 4790 if (ai[i+1] > ai[i]) nrows++; 4791 } 4792 len_si[proc] = 2*(nrows+1); 4793 len += len_si[proc]; 4794 } 4795 } 4796 4797 /* determine the number and length of messages to receive for ij-structure */ 4798 /*-------------------------------------------------------------------------*/ 4799 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4800 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4801 4802 /* post the Irecv of j-structure */ 4803 /*-------------------------------*/ 4804 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4805 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4806 4807 /* post the Isend of j-structure */ 4808 /*--------------------------------*/ 4809 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4810 4811 for (proc=0, k=0; proc<size; proc++){ 4812 if (!len_s[proc]) continue; 4813 i = owners[proc]; 4814 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4815 k++; 4816 } 4817 4818 /* receives and sends of j-structure are complete */ 4819 /*------------------------------------------------*/ 4820 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4821 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4822 4823 /* send and recv i-structure */ 4824 /*---------------------------*/ 4825 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4826 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4827 4828 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4829 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4830 for (proc=0,k=0; proc<size; proc++){ 4831 if (!len_s[proc]) continue; 4832 /* form outgoing message for i-structure: 4833 buf_si[0]: nrows to be sent 4834 [1:nrows]: row index (global) 4835 [nrows+1:2*nrows+1]: i-structure index 4836 */ 4837 /*-------------------------------------------*/ 4838 nrows = len_si[proc]/2 - 1; 4839 buf_si_i = buf_si + nrows+1; 4840 buf_si[0] = nrows; 4841 buf_si_i[0] = 0; 4842 nrows = 0; 4843 for (i=owners[proc]; i<owners[proc+1]; i++){ 4844 anzi = ai[i+1] - ai[i]; 4845 if (anzi) { 4846 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4847 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4848 nrows++; 4849 } 4850 } 4851 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4852 k++; 4853 buf_si += len_si[proc]; 4854 } 4855 4856 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4857 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4858 4859 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4860 for (i=0; i<merge->nrecv; i++){ 4861 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4862 } 4863 4864 ierr = PetscFree(len_si);CHKERRQ(ierr); 4865 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4866 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4867 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4868 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4869 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4870 ierr = PetscFree(status);CHKERRQ(ierr); 4871 4872 /* compute a local seq matrix in each processor */ 4873 /*----------------------------------------------*/ 4874 /* allocate bi array and free space for accumulating nonzero column info */ 4875 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4876 bi[0] = 0; 4877 4878 /* create and initialize a linked list */ 4879 nlnk = N+1; 4880 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4881 4882 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4883 len = 0; 4884 len = ai[owners[rank+1]] - ai[owners[rank]]; 4885 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4886 current_space = free_space; 4887 4888 /* determine symbolic info for each local row */ 4889 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4890 4891 for (k=0; k<merge->nrecv; k++){ 4892 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4893 nrows = *buf_ri_k[k]; 4894 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4895 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4896 } 4897 4898 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4899 len = 0; 4900 for (i=0;i<m;i++) { 4901 bnzi = 0; 4902 /* add local non-zero cols of this proc's seqmat into lnk */ 4903 arow = owners[rank] + i; 4904 anzi = ai[arow+1] - ai[arow]; 4905 aj = a->j + ai[arow]; 4906 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4907 bnzi += nlnk; 4908 /* add received col data into lnk */ 4909 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4910 if (i == *nextrow[k]) { /* i-th row */ 4911 anzi = *(nextai[k]+1) - *nextai[k]; 4912 aj = buf_rj[k] + *nextai[k]; 4913 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4914 bnzi += nlnk; 4915 nextrow[k]++; nextai[k]++; 4916 } 4917 } 4918 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4919 4920 /* if free space is not available, make more free space */ 4921 if (current_space->local_remaining<bnzi) { 4922 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4923 nspacedouble++; 4924 } 4925 /* copy data into free space, then initialize lnk */ 4926 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4927 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4928 4929 current_space->array += bnzi; 4930 current_space->local_used += bnzi; 4931 current_space->local_remaining -= bnzi; 4932 4933 bi[i+1] = bi[i] + bnzi; 4934 } 4935 4936 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4937 4938 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4939 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4940 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4941 4942 /* create symbolic parallel matrix B_mpi */ 4943 /*---------------------------------------*/ 4944 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4945 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4946 if (n==PETSC_DECIDE) { 4947 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4948 } else { 4949 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4950 } 4951 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4952 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4953 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4954 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4955 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4956 4957 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4958 B_mpi->assembled = PETSC_FALSE; 4959 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4960 merge->bi = bi; 4961 merge->bj = bj; 4962 merge->buf_ri = buf_ri; 4963 merge->buf_rj = buf_rj; 4964 merge->coi = PETSC_NULL; 4965 merge->coj = PETSC_NULL; 4966 merge->owners_co = PETSC_NULL; 4967 4968 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4969 4970 /* attach the supporting struct to B_mpi for reuse */ 4971 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4972 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4973 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4974 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4975 *mpimat = B_mpi; 4976 4977 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4978 PetscFunctionReturn(0); 4979 } 4980 4981 #undef __FUNCT__ 4982 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4983 /*@C 4984 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4985 matrices from each processor 4986 4987 Collective on MPI_Comm 4988 4989 Input Parameters: 4990 + comm - the communicators the parallel matrix will live on 4991 . seqmat - the input sequential matrices 4992 . m - number of local rows (or PETSC_DECIDE) 4993 . n - number of local columns (or PETSC_DECIDE) 4994 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4995 4996 Output Parameter: 4997 . mpimat - the parallel matrix generated 4998 4999 Level: advanced 5000 5001 Notes: 5002 The dimensions of the sequential matrix in each processor MUST be the same. 5003 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5004 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5005 @*/ 5006 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5007 { 5008 PetscErrorCode ierr; 5009 PetscMPIInt size; 5010 5011 PetscFunctionBegin; 5012 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5013 if (size == 1){ 5014 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5015 if (scall == MAT_INITIAL_MATRIX){ 5016 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5017 } else { 5018 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5019 } 5020 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5021 PetscFunctionReturn(0); 5022 } 5023 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5024 if (scall == MAT_INITIAL_MATRIX){ 5025 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5026 } 5027 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5028 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5029 PetscFunctionReturn(0); 5030 } 5031 5032 #undef __FUNCT__ 5033 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5034 /*@ 5035 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5036 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5037 with MatGetSize() 5038 5039 Not Collective 5040 5041 Input Parameters: 5042 + A - the matrix 5043 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5044 5045 Output Parameter: 5046 . A_loc - the local sequential matrix generated 5047 5048 Level: developer 5049 5050 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5051 5052 @*/ 5053 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5054 { 5055 PetscErrorCode ierr; 5056 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5057 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5058 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5059 MatScalar *aa=a->a,*ba=b->a,*cam; 5060 PetscScalar *ca; 5061 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5062 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5063 PetscBool match; 5064 5065 PetscFunctionBegin; 5066 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5067 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5068 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5069 if (scall == MAT_INITIAL_MATRIX){ 5070 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5071 ci[0] = 0; 5072 for (i=0; i<am; i++){ 5073 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5074 } 5075 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5076 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5077 k = 0; 5078 for (i=0; i<am; i++) { 5079 ncols_o = bi[i+1] - bi[i]; 5080 ncols_d = ai[i+1] - ai[i]; 5081 /* off-diagonal portion of A */ 5082 for (jo=0; jo<ncols_o; jo++) { 5083 col = cmap[*bj]; 5084 if (col >= cstart) break; 5085 cj[k] = col; bj++; 5086 ca[k++] = *ba++; 5087 } 5088 /* diagonal portion of A */ 5089 for (j=0; j<ncols_d; j++) { 5090 cj[k] = cstart + *aj++; 5091 ca[k++] = *aa++; 5092 } 5093 /* off-diagonal portion of A */ 5094 for (j=jo; j<ncols_o; j++) { 5095 cj[k] = cmap[*bj++]; 5096 ca[k++] = *ba++; 5097 } 5098 } 5099 /* put together the new matrix */ 5100 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5101 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5102 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5103 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5104 mat->free_a = PETSC_TRUE; 5105 mat->free_ij = PETSC_TRUE; 5106 mat->nonew = 0; 5107 } else if (scall == MAT_REUSE_MATRIX){ 5108 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5109 ci = mat->i; cj = mat->j; cam = mat->a; 5110 for (i=0; i<am; i++) { 5111 /* off-diagonal portion of A */ 5112 ncols_o = bi[i+1] - bi[i]; 5113 for (jo=0; jo<ncols_o; jo++) { 5114 col = cmap[*bj]; 5115 if (col >= cstart) break; 5116 *cam++ = *ba++; bj++; 5117 } 5118 /* diagonal portion of A */ 5119 ncols_d = ai[i+1] - ai[i]; 5120 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5121 /* off-diagonal portion of A */ 5122 for (j=jo; j<ncols_o; j++) { 5123 *cam++ = *ba++; bj++; 5124 } 5125 } 5126 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5127 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5128 PetscFunctionReturn(0); 5129 } 5130 5131 #undef __FUNCT__ 5132 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5133 /*@C 5134 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5135 5136 Not Collective 5137 5138 Input Parameters: 5139 + A - the matrix 5140 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5141 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5142 5143 Output Parameter: 5144 . A_loc - the local sequential matrix generated 5145 5146 Level: developer 5147 5148 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5149 5150 @*/ 5151 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5152 { 5153 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5154 PetscErrorCode ierr; 5155 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5156 IS isrowa,iscola; 5157 Mat *aloc; 5158 PetscBool match; 5159 5160 PetscFunctionBegin; 5161 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5162 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5163 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5164 if (!row){ 5165 start = A->rmap->rstart; end = A->rmap->rend; 5166 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5167 } else { 5168 isrowa = *row; 5169 } 5170 if (!col){ 5171 start = A->cmap->rstart; 5172 cmap = a->garray; 5173 nzA = a->A->cmap->n; 5174 nzB = a->B->cmap->n; 5175 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5176 ncols = 0; 5177 for (i=0; i<nzB; i++) { 5178 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5179 else break; 5180 } 5181 imark = i; 5182 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5183 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5184 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5185 } else { 5186 iscola = *col; 5187 } 5188 if (scall != MAT_INITIAL_MATRIX){ 5189 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5190 aloc[0] = *A_loc; 5191 } 5192 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5193 *A_loc = aloc[0]; 5194 ierr = PetscFree(aloc);CHKERRQ(ierr); 5195 if (!row){ 5196 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5197 } 5198 if (!col){ 5199 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5200 } 5201 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5202 PetscFunctionReturn(0); 5203 } 5204 5205 #undef __FUNCT__ 5206 #define __FUNCT__ "MatGetBrowsOfAcols" 5207 /*@C 5208 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5209 5210 Collective on Mat 5211 5212 Input Parameters: 5213 + A,B - the matrices in mpiaij format 5214 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5215 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5216 5217 Output Parameter: 5218 + rowb, colb - index sets of rows and columns of B to extract 5219 - B_seq - the sequential matrix generated 5220 5221 Level: developer 5222 5223 @*/ 5224 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5225 { 5226 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5227 PetscErrorCode ierr; 5228 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5229 IS isrowb,iscolb; 5230 Mat *bseq=PETSC_NULL; 5231 5232 PetscFunctionBegin; 5233 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5234 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5235 } 5236 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5237 5238 if (scall == MAT_INITIAL_MATRIX){ 5239 start = A->cmap->rstart; 5240 cmap = a->garray; 5241 nzA = a->A->cmap->n; 5242 nzB = a->B->cmap->n; 5243 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5244 ncols = 0; 5245 for (i=0; i<nzB; i++) { /* row < local row index */ 5246 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5247 else break; 5248 } 5249 imark = i; 5250 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5251 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5252 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5253 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5254 } else { 5255 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5256 isrowb = *rowb; iscolb = *colb; 5257 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5258 bseq[0] = *B_seq; 5259 } 5260 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5261 *B_seq = bseq[0]; 5262 ierr = PetscFree(bseq);CHKERRQ(ierr); 5263 if (!rowb){ 5264 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5265 } else { 5266 *rowb = isrowb; 5267 } 5268 if (!colb){ 5269 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5270 } else { 5271 *colb = iscolb; 5272 } 5273 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5274 PetscFunctionReturn(0); 5275 } 5276 5277 #undef __FUNCT__ 5278 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5279 /* 5280 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5281 of the OFF-DIAGONAL portion of local A 5282 5283 Collective on Mat 5284 5285 Input Parameters: 5286 + A,B - the matrices in mpiaij format 5287 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5288 5289 Output Parameter: 5290 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5291 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5292 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5293 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5294 5295 Level: developer 5296 5297 */ 5298 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5299 { 5300 VecScatter_MPI_General *gen_to,*gen_from; 5301 PetscErrorCode ierr; 5302 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5303 Mat_SeqAIJ *b_oth; 5304 VecScatter ctx=a->Mvctx; 5305 MPI_Comm comm=((PetscObject)ctx)->comm; 5306 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5307 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5308 PetscScalar *rvalues,*svalues; 5309 MatScalar *b_otha,*bufa,*bufA; 5310 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5311 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5312 MPI_Status *sstatus,rstatus; 5313 PetscMPIInt jj; 5314 PetscInt *cols,sbs,rbs; 5315 PetscScalar *vals; 5316 5317 PetscFunctionBegin; 5318 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5319 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5320 } 5321 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5322 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5323 5324 gen_to = (VecScatter_MPI_General*)ctx->todata; 5325 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5326 rvalues = gen_from->values; /* holds the length of receiving row */ 5327 svalues = gen_to->values; /* holds the length of sending row */ 5328 nrecvs = gen_from->n; 5329 nsends = gen_to->n; 5330 5331 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5332 srow = gen_to->indices; /* local row index to be sent */ 5333 sstarts = gen_to->starts; 5334 sprocs = gen_to->procs; 5335 sstatus = gen_to->sstatus; 5336 sbs = gen_to->bs; 5337 rstarts = gen_from->starts; 5338 rprocs = gen_from->procs; 5339 rbs = gen_from->bs; 5340 5341 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5342 if (scall == MAT_INITIAL_MATRIX){ 5343 /* i-array */ 5344 /*---------*/ 5345 /* post receives */ 5346 for (i=0; i<nrecvs; i++){ 5347 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5348 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5349 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5350 } 5351 5352 /* pack the outgoing message */ 5353 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5354 sstartsj[0] = 0; rstartsj[0] = 0; 5355 len = 0; /* total length of j or a array to be sent */ 5356 k = 0; 5357 for (i=0; i<nsends; i++){ 5358 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5359 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5360 for (j=0; j<nrows; j++) { 5361 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5362 for (l=0; l<sbs; l++){ 5363 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5364 rowlen[j*sbs+l] = ncols; 5365 len += ncols; 5366 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5367 } 5368 k++; 5369 } 5370 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5371 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5372 } 5373 /* recvs and sends of i-array are completed */ 5374 i = nrecvs; 5375 while (i--) { 5376 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5377 } 5378 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5379 5380 /* allocate buffers for sending j and a arrays */ 5381 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5382 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5383 5384 /* create i-array of B_oth */ 5385 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5386 b_othi[0] = 0; 5387 len = 0; /* total length of j or a array to be received */ 5388 k = 0; 5389 for (i=0; i<nrecvs; i++){ 5390 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5391 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5392 for (j=0; j<nrows; j++) { 5393 b_othi[k+1] = b_othi[k] + rowlen[j]; 5394 len += rowlen[j]; k++; 5395 } 5396 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5397 } 5398 5399 /* allocate space for j and a arrrays of B_oth */ 5400 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5401 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5402 5403 /* j-array */ 5404 /*---------*/ 5405 /* post receives of j-array */ 5406 for (i=0; i<nrecvs; i++){ 5407 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5408 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5409 } 5410 5411 /* pack the outgoing message j-array */ 5412 k = 0; 5413 for (i=0; i<nsends; i++){ 5414 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5415 bufJ = bufj+sstartsj[i]; 5416 for (j=0; j<nrows; j++) { 5417 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5418 for (ll=0; ll<sbs; ll++){ 5419 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5420 for (l=0; l<ncols; l++){ 5421 *bufJ++ = cols[l]; 5422 } 5423 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5424 } 5425 } 5426 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5427 } 5428 5429 /* recvs and sends of j-array are completed */ 5430 i = nrecvs; 5431 while (i--) { 5432 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5433 } 5434 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5435 } else if (scall == MAT_REUSE_MATRIX){ 5436 sstartsj = *startsj_s; 5437 rstartsj = *startsj_r; 5438 bufa = *bufa_ptr; 5439 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5440 b_otha = b_oth->a; 5441 } else { 5442 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5443 } 5444 5445 /* a-array */ 5446 /*---------*/ 5447 /* post receives of a-array */ 5448 for (i=0; i<nrecvs; i++){ 5449 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5450 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5451 } 5452 5453 /* pack the outgoing message a-array */ 5454 k = 0; 5455 for (i=0; i<nsends; i++){ 5456 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5457 bufA = bufa+sstartsj[i]; 5458 for (j=0; j<nrows; j++) { 5459 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5460 for (ll=0; ll<sbs; ll++){ 5461 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5462 for (l=0; l<ncols; l++){ 5463 *bufA++ = vals[l]; 5464 } 5465 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5466 } 5467 } 5468 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5469 } 5470 /* recvs and sends of a-array are completed */ 5471 i = nrecvs; 5472 while (i--) { 5473 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5474 } 5475 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5476 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5477 5478 if (scall == MAT_INITIAL_MATRIX){ 5479 /* put together the new matrix */ 5480 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5481 5482 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5483 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5484 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5485 b_oth->free_a = PETSC_TRUE; 5486 b_oth->free_ij = PETSC_TRUE; 5487 b_oth->nonew = 0; 5488 5489 ierr = PetscFree(bufj);CHKERRQ(ierr); 5490 if (!startsj_s || !bufa_ptr){ 5491 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5492 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5493 } else { 5494 *startsj_s = sstartsj; 5495 *startsj_r = rstartsj; 5496 *bufa_ptr = bufa; 5497 } 5498 } 5499 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5500 PetscFunctionReturn(0); 5501 } 5502 5503 #undef __FUNCT__ 5504 #define __FUNCT__ "MatGetCommunicationStructs" 5505 /*@C 5506 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5507 5508 Not Collective 5509 5510 Input Parameters: 5511 . A - The matrix in mpiaij format 5512 5513 Output Parameter: 5514 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5515 . colmap - A map from global column index to local index into lvec 5516 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5517 5518 Level: developer 5519 5520 @*/ 5521 #if defined (PETSC_USE_CTABLE) 5522 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5523 #else 5524 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5525 #endif 5526 { 5527 Mat_MPIAIJ *a; 5528 5529 PetscFunctionBegin; 5530 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5531 PetscValidPointer(lvec, 2); 5532 PetscValidPointer(colmap, 3); 5533 PetscValidPointer(multScatter, 4); 5534 a = (Mat_MPIAIJ *) A->data; 5535 if (lvec) *lvec = a->lvec; 5536 if (colmap) *colmap = a->colmap; 5537 if (multScatter) *multScatter = a->Mvctx; 5538 PetscFunctionReturn(0); 5539 } 5540 5541 EXTERN_C_BEGIN 5542 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5543 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5544 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5545 EXTERN_C_END 5546 5547 #undef __FUNCT__ 5548 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5549 /* 5550 Computes (B'*A')' since computing B*A directly is untenable 5551 5552 n p p 5553 ( ) ( ) ( ) 5554 m ( A ) * n ( B ) = m ( C ) 5555 ( ) ( ) ( ) 5556 5557 */ 5558 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5559 { 5560 PetscErrorCode ierr; 5561 Mat At,Bt,Ct; 5562 5563 PetscFunctionBegin; 5564 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5565 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5566 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5567 ierr = MatDestroy(&At);CHKERRQ(ierr); 5568 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5569 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5570 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5571 PetscFunctionReturn(0); 5572 } 5573 5574 #undef __FUNCT__ 5575 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5576 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5577 { 5578 PetscErrorCode ierr; 5579 PetscInt m=A->rmap->n,n=B->cmap->n; 5580 Mat Cmat; 5581 5582 PetscFunctionBegin; 5583 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5584 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5585 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5586 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5587 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5588 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5589 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5590 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5591 *C = Cmat; 5592 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5593 PetscFunctionReturn(0); 5594 } 5595 5596 /* ----------------------------------------------------------------*/ 5597 #undef __FUNCT__ 5598 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5599 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5600 { 5601 PetscErrorCode ierr; 5602 5603 PetscFunctionBegin; 5604 if (scall == MAT_INITIAL_MATRIX){ 5605 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5606 } 5607 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5608 PetscFunctionReturn(0); 5609 } 5610 5611 EXTERN_C_BEGIN 5612 #if defined(PETSC_HAVE_MUMPS) 5613 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5614 #endif 5615 #if defined(PETSC_HAVE_PASTIX) 5616 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5617 #endif 5618 #if defined(PETSC_HAVE_SUPERLU_DIST) 5619 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5620 #endif 5621 #if defined(PETSC_HAVE_CLIQUE) 5622 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5623 #endif 5624 EXTERN_C_END 5625 5626 /*MC 5627 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5628 5629 Options Database Keys: 5630 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5631 5632 Level: beginner 5633 5634 .seealso: MatCreateAIJ() 5635 M*/ 5636 5637 EXTERN_C_BEGIN 5638 #undef __FUNCT__ 5639 #define __FUNCT__ "MatCreate_MPIAIJ" 5640 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5641 { 5642 Mat_MPIAIJ *b; 5643 PetscErrorCode ierr; 5644 PetscMPIInt size; 5645 5646 PetscFunctionBegin; 5647 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5648 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5649 B->data = (void*)b; 5650 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5651 B->assembled = PETSC_FALSE; 5652 B->insertmode = NOT_SET_VALUES; 5653 b->size = size; 5654 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5655 5656 /* build cache for off array entries formed */ 5657 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5658 b->donotstash = PETSC_FALSE; 5659 b->colmap = 0; 5660 b->garray = 0; 5661 b->roworiented = PETSC_TRUE; 5662 5663 /* stuff used for matrix vector multiply */ 5664 b->lvec = PETSC_NULL; 5665 b->Mvctx = PETSC_NULL; 5666 5667 /* stuff for MatGetRow() */ 5668 b->rowindices = 0; 5669 b->rowvalues = 0; 5670 b->getrowactive = PETSC_FALSE; 5671 5672 /* flexible pointer used in CUSP/CUSPARSE classes */ 5673 b->spptr = PETSC_NULL; 5674 5675 #if defined(PETSC_HAVE_MUMPS) 5676 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5677 "MatGetFactor_aij_mumps", 5678 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5679 #endif 5680 #if defined(PETSC_HAVE_PASTIX) 5681 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5682 "MatGetFactor_mpiaij_pastix", 5683 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5684 #endif 5685 #if defined(PETSC_HAVE_SUPERLU_DIST) 5686 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5687 "MatGetFactor_mpiaij_superlu_dist", 5688 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5689 #endif 5690 #if defined(PETSC_HAVE_CLIQUE) 5691 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C", 5692 "MatGetFactor_aij_clique", 5693 MatGetFactor_aij_clique);CHKERRQ(ierr); 5694 #endif 5695 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5696 "MatStoreValues_MPIAIJ", 5697 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5698 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5699 "MatRetrieveValues_MPIAIJ", 5700 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5701 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5702 "MatGetDiagonalBlock_MPIAIJ", 5703 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5704 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5705 "MatIsTranspose_MPIAIJ", 5706 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5707 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5708 "MatMPIAIJSetPreallocation_MPIAIJ", 5709 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5710 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5711 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5712 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5713 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5714 "MatDiagonalScaleLocal_MPIAIJ", 5715 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5716 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5717 "MatConvert_MPIAIJ_MPIAIJPERM", 5718 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5719 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5720 "MatConvert_MPIAIJ_MPIAIJCRL", 5721 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5722 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5723 "MatConvert_MPIAIJ_MPISBAIJ", 5724 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5725 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5726 "MatMatMult_MPIDense_MPIAIJ", 5727 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5728 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5729 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5730 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5731 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5732 "MatMatMultNumeric_MPIDense_MPIAIJ", 5733 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5734 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5735 PetscFunctionReturn(0); 5736 } 5737 EXTERN_C_END 5738 5739 #undef __FUNCT__ 5740 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5741 /*@ 5742 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5743 and "off-diagonal" part of the matrix in CSR format. 5744 5745 Collective on MPI_Comm 5746 5747 Input Parameters: 5748 + comm - MPI communicator 5749 . m - number of local rows (Cannot be PETSC_DECIDE) 5750 . n - This value should be the same as the local size used in creating the 5751 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5752 calculated if N is given) For square matrices n is almost always m. 5753 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5754 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5755 . i - row indices for "diagonal" portion of matrix 5756 . j - column indices 5757 . a - matrix values 5758 . oi - row indices for "off-diagonal" portion of matrix 5759 . oj - column indices 5760 - oa - matrix values 5761 5762 Output Parameter: 5763 . mat - the matrix 5764 5765 Level: advanced 5766 5767 Notes: 5768 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5769 must free the arrays once the matrix has been destroyed and not before. 5770 5771 The i and j indices are 0 based 5772 5773 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5774 5775 This sets local rows and cannot be used to set off-processor values. 5776 5777 You cannot later use MatSetValues() to change values in this matrix. 5778 5779 .keywords: matrix, aij, compressed row, sparse, parallel 5780 5781 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5782 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5783 @*/ 5784 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5785 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5786 { 5787 PetscErrorCode ierr; 5788 Mat_MPIAIJ *maij; 5789 5790 PetscFunctionBegin; 5791 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5792 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5793 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5794 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5795 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5796 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5797 maij = (Mat_MPIAIJ*) (*mat)->data; 5798 maij->donotstash = PETSC_TRUE; 5799 (*mat)->preallocated = PETSC_TRUE; 5800 5801 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5802 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5803 5804 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5805 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5806 5807 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5808 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5809 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5810 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5811 5812 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5813 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5814 PetscFunctionReturn(0); 5815 } 5816 5817 /* 5818 Special version for direct calls from Fortran 5819 */ 5820 #include <petsc-private/fortranimpl.h> 5821 5822 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5823 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5824 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5825 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5826 #endif 5827 5828 /* Change these macros so can be used in void function */ 5829 #undef CHKERRQ 5830 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5831 #undef SETERRQ2 5832 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5833 #undef SETERRQ3 5834 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5835 #undef SETERRQ 5836 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5837 5838 EXTERN_C_BEGIN 5839 #undef __FUNCT__ 5840 #define __FUNCT__ "matsetvaluesmpiaij_" 5841 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5842 { 5843 Mat mat = *mmat; 5844 PetscInt m = *mm, n = *mn; 5845 InsertMode addv = *maddv; 5846 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5847 PetscScalar value; 5848 PetscErrorCode ierr; 5849 5850 MatCheckPreallocated(mat,1); 5851 if (mat->insertmode == NOT_SET_VALUES) { 5852 mat->insertmode = addv; 5853 } 5854 #if defined(PETSC_USE_DEBUG) 5855 else if (mat->insertmode != addv) { 5856 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5857 } 5858 #endif 5859 { 5860 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5861 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5862 PetscBool roworiented = aij->roworiented; 5863 5864 /* Some Variables required in the macro */ 5865 Mat A = aij->A; 5866 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5867 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5868 MatScalar *aa = a->a; 5869 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5870 Mat B = aij->B; 5871 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5872 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5873 MatScalar *ba = b->a; 5874 5875 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5876 PetscInt nonew = a->nonew; 5877 MatScalar *ap1,*ap2; 5878 5879 PetscFunctionBegin; 5880 for (i=0; i<m; i++) { 5881 if (im[i] < 0) continue; 5882 #if defined(PETSC_USE_DEBUG) 5883 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5884 #endif 5885 if (im[i] >= rstart && im[i] < rend) { 5886 row = im[i] - rstart; 5887 lastcol1 = -1; 5888 rp1 = aj + ai[row]; 5889 ap1 = aa + ai[row]; 5890 rmax1 = aimax[row]; 5891 nrow1 = ailen[row]; 5892 low1 = 0; 5893 high1 = nrow1; 5894 lastcol2 = -1; 5895 rp2 = bj + bi[row]; 5896 ap2 = ba + bi[row]; 5897 rmax2 = bimax[row]; 5898 nrow2 = bilen[row]; 5899 low2 = 0; 5900 high2 = nrow2; 5901 5902 for (j=0; j<n; j++) { 5903 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5904 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5905 if (in[j] >= cstart && in[j] < cend){ 5906 col = in[j] - cstart; 5907 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5908 } else if (in[j] < 0) continue; 5909 #if defined(PETSC_USE_DEBUG) 5910 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5911 #endif 5912 else { 5913 if (mat->was_assembled) { 5914 if (!aij->colmap) { 5915 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5916 } 5917 #if defined (PETSC_USE_CTABLE) 5918 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5919 col--; 5920 #else 5921 col = aij->colmap[in[j]] - 1; 5922 #endif 5923 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5924 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5925 col = in[j]; 5926 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5927 B = aij->B; 5928 b = (Mat_SeqAIJ*)B->data; 5929 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5930 rp2 = bj + bi[row]; 5931 ap2 = ba + bi[row]; 5932 rmax2 = bimax[row]; 5933 nrow2 = bilen[row]; 5934 low2 = 0; 5935 high2 = nrow2; 5936 bm = aij->B->rmap->n; 5937 ba = b->a; 5938 } 5939 } else col = in[j]; 5940 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5941 } 5942 } 5943 } else { 5944 if (!aij->donotstash) { 5945 if (roworiented) { 5946 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5947 } else { 5948 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5949 } 5950 } 5951 } 5952 }} 5953 PetscFunctionReturnVoid(); 5954 } 5955 EXTERN_C_END 5956 5957