1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr); 187 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 188 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 189 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 190 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 191 rowners[0] = 0; 192 for (i=2; i<=size; i++) { 193 rowners[i] += rowners[i-1]; 194 } 195 rstart = rowners[rank]; 196 rend = rowners[rank+1]; 197 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 198 if (!rank) { 199 gmata = (Mat_SeqAIJ*) gmat->data; 200 /* send row lengths to all processors */ 201 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 202 for (i=1; i<size; i++) { 203 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 204 } 205 /* determine number diagonal and off-diagonal counts */ 206 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 207 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 208 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 209 jj = 0; 210 for (i=0; i<m; i++) { 211 for (j=0; j<dlens[i]; j++) { 212 if (gmata->j[jj] < rstart) ld[i]++; 213 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 214 jj++; 215 } 216 } 217 /* send column indices to other processes */ 218 for (i=1; i<size; i++) { 219 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 220 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 222 } 223 224 /* send numerical values to other processes */ 225 for (i=1; i<size; i++) { 226 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 227 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 228 } 229 gmataa = gmata->a; 230 gmataj = gmata->j; 231 232 } else { 233 /* receive row lengths */ 234 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 235 /* receive column indices */ 236 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 237 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 238 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 239 /* determine number diagonal and off-diagonal counts */ 240 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 241 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 242 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 243 jj = 0; 244 for (i=0; i<m; i++) { 245 for (j=0; j<dlens[i]; j++) { 246 if (gmataj[jj] < rstart) ld[i]++; 247 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 248 jj++; 249 } 250 } 251 /* receive numerical values */ 252 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 253 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 254 } 255 /* set preallocation */ 256 for (i=0; i<m; i++) { 257 dlens[i] -= olens[i]; 258 } 259 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 260 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 261 262 for (i=0; i<m; i++) { 263 dlens[i] += olens[i]; 264 } 265 cnt = 0; 266 for (i=0; i<m; i++) { 267 row = rstart + i; 268 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 269 cnt += dlens[i]; 270 } 271 if (rank) { 272 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 273 } 274 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 275 ierr = PetscFree(rowners);CHKERRQ(ierr); 276 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 277 *inmat = mat; 278 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 279 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 280 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 281 mat = *inmat; 282 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 283 if (!rank) { 284 /* send numerical values to other processes */ 285 gmata = (Mat_SeqAIJ*) gmat->data; 286 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 287 gmataa = gmata->a; 288 for (i=1; i<size; i++) { 289 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 290 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 291 } 292 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 293 } else { 294 /* receive numerical values from process 0*/ 295 nz = Ad->nz + Ao->nz; 296 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 297 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 298 } 299 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 300 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 301 ad = Ad->a; 302 ao = Ao->a; 303 if (mat->rmap->n) { 304 i = 0; 305 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 306 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 307 } 308 for (i=1; i<mat->rmap->n; i++) { 309 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 310 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 311 } 312 i--; 313 if (mat->rmap->n) { 314 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 315 } 316 if (rank) { 317 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 318 } 319 } 320 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 322 CHKMEMQ; 323 PetscFunctionReturn(0); 324 } 325 326 /* 327 Local utility routine that creates a mapping from the global column 328 number to the local number in the off-diagonal part of the local 329 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 330 a slightly higher hash table cost; without it it is not scalable (each processor 331 has an order N integer array but is fast to acess. 332 */ 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 336 { 337 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 338 PetscErrorCode ierr; 339 PetscInt n = aij->B->cmap->n,i; 340 341 PetscFunctionBegin; 342 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 343 #if defined (PETSC_USE_CTABLE) 344 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 345 for (i=0; i<n; i++){ 346 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 347 } 348 #else 349 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 350 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 351 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 352 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 353 #endif 354 PetscFunctionReturn(0); 355 } 356 357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 358 { \ 359 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 360 lastcol1 = col;\ 361 while (high1-low1 > 5) { \ 362 t = (low1+high1)/2; \ 363 if (rp1[t] > col) high1 = t; \ 364 else low1 = t; \ 365 } \ 366 for (_i=low1; _i<high1; _i++) { \ 367 if (rp1[_i] > col) break; \ 368 if (rp1[_i] == col) { \ 369 if (addv == ADD_VALUES) ap1[_i] += value; \ 370 else ap1[_i] = value; \ 371 goto a_noinsert; \ 372 } \ 373 } \ 374 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 375 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 376 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 377 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 378 N = nrow1++ - 1; a->nz++; high1++; \ 379 /* shift up all the later entries in this row */ \ 380 for (ii=N; ii>=_i; ii--) { \ 381 rp1[ii+1] = rp1[ii]; \ 382 ap1[ii+1] = ap1[ii]; \ 383 } \ 384 rp1[_i] = col; \ 385 ap1[_i] = value; \ 386 a_noinsert: ; \ 387 ailen[row] = nrow1; \ 388 } 389 390 391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 392 { \ 393 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 394 lastcol2 = col;\ 395 while (high2-low2 > 5) { \ 396 t = (low2+high2)/2; \ 397 if (rp2[t] > col) high2 = t; \ 398 else low2 = t; \ 399 } \ 400 for (_i=low2; _i<high2; _i++) { \ 401 if (rp2[_i] > col) break; \ 402 if (rp2[_i] == col) { \ 403 if (addv == ADD_VALUES) ap2[_i] += value; \ 404 else ap2[_i] = value; \ 405 goto b_noinsert; \ 406 } \ 407 } \ 408 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 409 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 410 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 411 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 412 N = nrow2++ - 1; b->nz++; high2++; \ 413 /* shift up all the later entries in this row */ \ 414 for (ii=N; ii>=_i; ii--) { \ 415 rp2[ii+1] = rp2[ii]; \ 416 ap2[ii+1] = ap2[ii]; \ 417 } \ 418 rp2[_i] = col; \ 419 ap2[_i] = value; \ 420 b_noinsert: ; \ 421 bilen[row] = nrow2; \ 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 427 { 428 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 430 PetscErrorCode ierr; 431 PetscInt l,*garray = mat->garray,diag; 432 433 PetscFunctionBegin; 434 /* code only works for square matrices A */ 435 436 /* find size of row to the left of the diagonal part */ 437 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 438 row = row - diag; 439 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 440 if (garray[b->j[b->i[row]+l]] > diag) break; 441 } 442 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 443 444 /* diagonal part */ 445 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 446 447 /* right of diagonal part */ 448 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValues_MPIAIJ" 454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscScalar value; 458 PetscErrorCode ierr; 459 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 460 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 461 PetscBool roworiented = aij->roworiented; 462 463 /* Some Variables required in the macro */ 464 Mat A = aij->A; 465 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 466 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 467 MatScalar *aa = a->a; 468 PetscBool ignorezeroentries = a->ignorezeroentries; 469 Mat B = aij->B; 470 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 471 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 472 MatScalar *ba = b->a; 473 474 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 475 PetscInt nonew; 476 MatScalar *ap1,*ap2; 477 478 PetscFunctionBegin; 479 if (v) PetscValidScalarPointer(v,6); 480 for (i=0; i<m; i++) { 481 if (im[i] < 0) continue; 482 #if defined(PETSC_USE_DEBUG) 483 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 484 #endif 485 if (im[i] >= rstart && im[i] < rend) { 486 row = im[i] - rstart; 487 lastcol1 = -1; 488 rp1 = aj + ai[row]; 489 ap1 = aa + ai[row]; 490 rmax1 = aimax[row]; 491 nrow1 = ailen[row]; 492 low1 = 0; 493 high1 = nrow1; 494 lastcol2 = -1; 495 rp2 = bj + bi[row]; 496 ap2 = ba + bi[row]; 497 rmax2 = bimax[row]; 498 nrow2 = bilen[row]; 499 low2 = 0; 500 high2 = nrow2; 501 502 for (j=0; j<n; j++) { 503 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 504 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 505 if (in[j] >= cstart && in[j] < cend){ 506 col = in[j] - cstart; 507 nonew = a->nonew; 508 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 509 } else if (in[j] < 0) continue; 510 #if defined(PETSC_USE_DEBUG) 511 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 512 #endif 513 else { 514 if (mat->was_assembled) { 515 if (!aij->colmap) { 516 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 517 } 518 #if defined (PETSC_USE_CTABLE) 519 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col--; 521 #else 522 col = aij->colmap[in[j]] - 1; 523 #endif 524 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 528 B = aij->B; 529 b = (Mat_SeqAIJ*)B->data; 530 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 531 rp2 = bj + bi[row]; 532 ap2 = ba + bi[row]; 533 rmax2 = bimax[row]; 534 nrow2 = bilen[row]; 535 low2 = 0; 536 high2 = nrow2; 537 bm = aij->B->rmap->n; 538 ba = b->a; 539 } 540 } else col = in[j]; 541 nonew = b->nonew; 542 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 543 } 544 } 545 } else { 546 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 547 if (!aij->donotstash) { 548 mat->assembled = PETSC_FALSE; 549 if (roworiented) { 550 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatGetValues_MPIAIJ" 562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 563 { 564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 565 PetscErrorCode ierr; 566 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 567 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 568 569 PetscFunctionBegin; 570 for (i=0; i<m; i++) { 571 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 572 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 573 if (idxm[i] >= rstart && idxm[i] < rend) { 574 row = idxm[i] - rstart; 575 for (j=0; j<n; j++) { 576 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 577 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 578 if (idxn[j] >= cstart && idxn[j] < cend){ 579 col = idxn[j] - cstart; 580 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } else { 582 if (!aij->colmap) { 583 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 584 } 585 #if defined (PETSC_USE_CTABLE) 586 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 587 col --; 588 #else 589 col = aij->colmap[idxn[j]] - 1; 590 #endif 591 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 592 else { 593 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 594 } 595 } 596 } 597 } else { 598 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 599 } 600 } 601 PetscFunctionReturn(0); 602 } 603 604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 609 { 610 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 611 PetscErrorCode ierr; 612 PetscInt nstash,reallocs; 613 InsertMode addv; 614 615 PetscFunctionBegin; 616 if (aij->donotstash || mat->nooffprocentries) { 617 PetscFunctionReturn(0); 618 } 619 620 /* make sure all processors are either in INSERTMODE or ADDMODE */ 621 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 622 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 623 mat->insertmode = addv; /* in case this processor had no cache */ 624 625 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 626 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 627 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 634 { 635 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 636 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 637 PetscErrorCode ierr; 638 PetscMPIInt n; 639 PetscInt i,j,rstart,ncols,flg; 640 PetscInt *row,*col; 641 PetscBool other_disassembled; 642 PetscScalar *val; 643 InsertMode addv = mat->insertmode; 644 645 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 646 PetscFunctionBegin; 647 if (!aij->donotstash && !mat->nooffprocentries) { 648 while (1) { 649 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 650 if (!flg) break; 651 652 for (i=0; i<n;) { 653 /* Now identify the consecutive vals belonging to the same row */ 654 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 655 if (j < n) ncols = j-i; 656 else ncols = n-i; 657 /* Now assemble all these values with a single function call */ 658 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 659 i = j; 660 } 661 } 662 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 663 } 664 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 665 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 666 667 /* determine if any processor has disassembled, if so we must 668 also disassemble ourselfs, in order that we may reassemble. */ 669 /* 670 if nonzero structure of submatrix B cannot change then we know that 671 no processor disassembled thus we can skip this stuff 672 */ 673 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 674 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 675 if (mat->was_assembled && !other_disassembled) { 676 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 677 } 678 } 679 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 680 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 681 } 682 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 683 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 684 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 685 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 686 687 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 688 aij->rowvalues = 0; 689 690 /* used by MatAXPY() */ 691 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 692 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 693 694 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 695 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 702 { 703 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 704 PetscErrorCode ierr; 705 706 PetscFunctionBegin; 707 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 708 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatZeroRows_MPIAIJ" 714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 715 { 716 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 717 PetscErrorCode ierr; 718 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 719 PetscInt i,*owners = A->rmap->range; 720 PetscInt *nprocs,j,idx,nsends,row; 721 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 722 PetscInt *rvalues,count,base,slen,*source; 723 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 724 MPI_Comm comm = ((PetscObject)A)->comm; 725 MPI_Request *send_waits,*recv_waits; 726 MPI_Status recv_status,*send_status; 727 const PetscScalar *xx; 728 PetscScalar *bb; 729 #if defined(PETSC_DEBUG) 730 PetscBool found = PETSC_FALSE; 731 #endif 732 733 PetscFunctionBegin; 734 /* first count number of contributors to each processor */ 735 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 736 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 737 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 738 j = 0; 739 for (i=0; i<N; i++) { 740 if (lastidx > (idx = rows[i])) j = 0; 741 lastidx = idx; 742 for (; j<size; j++) { 743 if (idx >= owners[j] && idx < owners[j+1]) { 744 nprocs[2*j]++; 745 nprocs[2*j+1] = 1; 746 owner[i] = j; 747 #if defined(PETSC_DEBUG) 748 found = PETSC_TRUE; 749 #endif 750 break; 751 } 752 } 753 #if defined(PETSC_DEBUG) 754 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 755 found = PETSC_FALSE; 756 #endif 757 } 758 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 759 760 if (A->nooffproczerorows) { 761 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 762 nrecvs = nsends; 763 nmax = N; 764 } else { 765 /* inform other processors of number of messages and max length*/ 766 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 767 } 768 769 /* post receives: */ 770 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 771 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 772 for (i=0; i<nrecvs; i++) { 773 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 774 } 775 776 /* do sends: 777 1) starts[i] gives the starting index in svalues for stuff going to 778 the ith processor 779 */ 780 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 781 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 782 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 783 starts[0] = 0; 784 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 785 for (i=0; i<N; i++) { 786 svalues[starts[owner[i]]++] = rows[i]; 787 } 788 789 starts[0] = 0; 790 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 791 count = 0; 792 for (i=0; i<size; i++) { 793 if (nprocs[2*i+1]) { 794 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 795 } 796 } 797 ierr = PetscFree(starts);CHKERRQ(ierr); 798 799 base = owners[rank]; 800 801 /* wait on receives */ 802 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 803 count = nrecvs; slen = 0; 804 while (count) { 805 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 806 /* unpack receives into our local space */ 807 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 808 source[imdex] = recv_status.MPI_SOURCE; 809 lens[imdex] = n; 810 slen += n; 811 count--; 812 } 813 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 814 815 /* move the data into the send scatter */ 816 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 817 count = 0; 818 for (i=0; i<nrecvs; i++) { 819 values = rvalues + i*nmax; 820 for (j=0; j<lens[i]; j++) { 821 lrows[count++] = values[j] - base; 822 } 823 } 824 ierr = PetscFree(rvalues);CHKERRQ(ierr); 825 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 826 ierr = PetscFree(owner);CHKERRQ(ierr); 827 ierr = PetscFree(nprocs);CHKERRQ(ierr); 828 829 /* fix right hand side if needed */ 830 if (x && b) { 831 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 833 for (i=0; i<slen; i++) { 834 bb[lrows[i]] = diag*xx[lrows[i]]; 835 } 836 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 837 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 838 } 839 /* 840 Zero the required rows. If the "diagonal block" of the matrix 841 is square and the user wishes to set the diagonal we use separate 842 code so that MatSetValues() is not called for each diagonal allocating 843 new memory, thus calling lots of mallocs and slowing things down. 844 845 */ 846 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 847 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 848 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 849 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 850 } else if (diag != 0.0) { 851 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 852 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 853 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 855 } 856 for (i = 0; i < slen; i++) { 857 row = lrows[i] + rstart; 858 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 859 } 860 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 861 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 862 } else { 863 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 864 } 865 ierr = PetscFree(lrows);CHKERRQ(ierr); 866 867 /* wait on sends */ 868 if (nsends) { 869 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 870 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 871 ierr = PetscFree(send_status);CHKERRQ(ierr); 872 } 873 ierr = PetscFree(send_waits);CHKERRQ(ierr); 874 ierr = PetscFree(svalues);CHKERRQ(ierr); 875 PetscFunctionReturn(0); 876 } 877 878 #undef __FUNCT__ 879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 881 { 882 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 883 PetscErrorCode ierr; 884 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 885 PetscInt i,*owners = A->rmap->range; 886 PetscInt *nprocs,j,idx,nsends; 887 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 888 PetscInt *rvalues,count,base,slen,*source; 889 PetscInt *lens,*lrows,*values,m; 890 MPI_Comm comm = ((PetscObject)A)->comm; 891 MPI_Request *send_waits,*recv_waits; 892 MPI_Status recv_status,*send_status; 893 const PetscScalar *xx; 894 PetscScalar *bb,*mask; 895 Vec xmask,lmask; 896 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 897 const PetscInt *aj, *ii,*ridx; 898 PetscScalar *aa; 899 #if defined(PETSC_DEBUG) 900 PetscBool found = PETSC_FALSE; 901 #endif 902 903 PetscFunctionBegin; 904 /* first count number of contributors to each processor */ 905 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 906 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 907 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 908 j = 0; 909 for (i=0; i<N; i++) { 910 if (lastidx > (idx = rows[i])) j = 0; 911 lastidx = idx; 912 for (; j<size; j++) { 913 if (idx >= owners[j] && idx < owners[j+1]) { 914 nprocs[2*j]++; 915 nprocs[2*j+1] = 1; 916 owner[i] = j; 917 #if defined(PETSC_DEBUG) 918 found = PETSC_TRUE; 919 #endif 920 break; 921 } 922 } 923 #if defined(PETSC_DEBUG) 924 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 925 found = PETSC_FALSE; 926 #endif 927 } 928 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 929 930 /* inform other processors of number of messages and max length*/ 931 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 932 933 /* post receives: */ 934 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 935 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 936 for (i=0; i<nrecvs; i++) { 937 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 938 } 939 940 /* do sends: 941 1) starts[i] gives the starting index in svalues for stuff going to 942 the ith processor 943 */ 944 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 945 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 946 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 947 starts[0] = 0; 948 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 949 for (i=0; i<N; i++) { 950 svalues[starts[owner[i]]++] = rows[i]; 951 } 952 953 starts[0] = 0; 954 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 955 count = 0; 956 for (i=0; i<size; i++) { 957 if (nprocs[2*i+1]) { 958 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 959 } 960 } 961 ierr = PetscFree(starts);CHKERRQ(ierr); 962 963 base = owners[rank]; 964 965 /* wait on receives */ 966 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 967 count = nrecvs; slen = 0; 968 while (count) { 969 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 970 /* unpack receives into our local space */ 971 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 972 source[imdex] = recv_status.MPI_SOURCE; 973 lens[imdex] = n; 974 slen += n; 975 count--; 976 } 977 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 978 979 /* move the data into the send scatter */ 980 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 981 count = 0; 982 for (i=0; i<nrecvs; i++) { 983 values = rvalues + i*nmax; 984 for (j=0; j<lens[i]; j++) { 985 lrows[count++] = values[j] - base; 986 } 987 } 988 ierr = PetscFree(rvalues);CHKERRQ(ierr); 989 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 990 ierr = PetscFree(owner);CHKERRQ(ierr); 991 ierr = PetscFree(nprocs);CHKERRQ(ierr); 992 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 993 994 /* zero diagonal part of matrix */ 995 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 996 997 /* handle off diagonal part of matrix */ 998 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 999 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1000 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1001 for (i=0; i<slen; i++) { 1002 bb[lrows[i]] = 1; 1003 } 1004 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1005 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1008 if (x) { 1009 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1011 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1012 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1013 } 1014 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1015 1016 /* remove zeroed rows of off diagonal matrix */ 1017 ii = aij->i; 1018 for (i=0; i<slen; i++) { 1019 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1020 } 1021 1022 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1023 if (aij->compressedrow.use){ 1024 m = aij->compressedrow.nrows; 1025 ii = aij->compressedrow.i; 1026 ridx = aij->compressedrow.rindex; 1027 for (i=0; i<m; i++){ 1028 n = ii[i+1] - ii[i]; 1029 aj = aij->j + ii[i]; 1030 aa = aij->a + ii[i]; 1031 1032 for (j=0; j<n; j++) { 1033 if (PetscAbsScalar(mask[*aj])) { 1034 if (b) bb[*ridx] -= *aa*xx[*aj]; 1035 *aa = 0.0; 1036 } 1037 aa++; 1038 aj++; 1039 } 1040 ridx++; 1041 } 1042 } else { /* do not use compressed row format */ 1043 m = l->B->rmap->n; 1044 for (i=0; i<m; i++) { 1045 n = ii[i+1] - ii[i]; 1046 aj = aij->j + ii[i]; 1047 aa = aij->a + ii[i]; 1048 for (j=0; j<n; j++) { 1049 if (PetscAbsScalar(mask[*aj])) { 1050 if (b) bb[i] -= *aa*xx[*aj]; 1051 *aa = 0.0; 1052 } 1053 aa++; 1054 aj++; 1055 } 1056 } 1057 } 1058 if (x) { 1059 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1060 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1061 } 1062 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1063 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1064 ierr = PetscFree(lrows);CHKERRQ(ierr); 1065 1066 /* wait on sends */ 1067 if (nsends) { 1068 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1069 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1070 ierr = PetscFree(send_status);CHKERRQ(ierr); 1071 } 1072 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1073 ierr = PetscFree(svalues);CHKERRQ(ierr); 1074 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatMult_MPIAIJ" 1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1083 PetscErrorCode ierr; 1084 PetscInt nt; 1085 1086 PetscFunctionBegin; 1087 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1088 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1089 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1091 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1092 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1099 { 1100 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1111 { 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1118 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1128 PetscErrorCode ierr; 1129 PetscBool merged; 1130 1131 PetscFunctionBegin; 1132 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1133 /* do nondiagonal part */ 1134 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1135 if (!merged) { 1136 /* send it on its way */ 1137 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 /* do local part */ 1139 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1140 /* receive remote parts: note this assumes the values are not actually */ 1141 /* added in yy until the next line, */ 1142 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 } else { 1144 /* do local part */ 1145 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1146 /* send it on its way */ 1147 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1148 /* values actually were received in the Begin() but we need to call this nop */ 1149 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1150 } 1151 PetscFunctionReturn(0); 1152 } 1153 1154 EXTERN_C_BEGIN 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1157 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1158 { 1159 MPI_Comm comm; 1160 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1161 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1162 IS Me,Notme; 1163 PetscErrorCode ierr; 1164 PetscInt M,N,first,last,*notme,i; 1165 PetscMPIInt size; 1166 1167 PetscFunctionBegin; 1168 1169 /* Easy test: symmetric diagonal block */ 1170 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1171 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1172 if (!*f) PetscFunctionReturn(0); 1173 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1174 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1175 if (size == 1) PetscFunctionReturn(0); 1176 1177 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1178 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1179 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1180 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1181 for (i=0; i<first; i++) notme[i] = i; 1182 for (i=last; i<M; i++) notme[i-last+first] = i; 1183 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1184 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1185 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1186 Aoff = Aoffs[0]; 1187 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1188 Boff = Boffs[0]; 1189 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1190 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1191 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1192 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1193 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1194 ierr = PetscFree(notme);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 EXTERN_C_END 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1202 { 1203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1204 PetscErrorCode ierr; 1205 1206 PetscFunctionBegin; 1207 /* do nondiagonal part */ 1208 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1209 /* send it on its way */ 1210 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1211 /* do local part */ 1212 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1213 /* receive remote parts */ 1214 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 /* 1219 This only works correctly for square matrices where the subblock A->A is the 1220 diagonal block 1221 */ 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1225 { 1226 PetscErrorCode ierr; 1227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1228 1229 PetscFunctionBegin; 1230 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1231 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1232 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatScale_MPIAIJ" 1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1239 { 1240 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1245 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatDestroy_MPIAIJ" 1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1252 { 1253 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1254 PetscErrorCode ierr; 1255 1256 PetscFunctionBegin; 1257 #if defined(PETSC_USE_LOG) 1258 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1259 #endif 1260 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1261 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1263 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1264 #if defined (PETSC_USE_CTABLE) 1265 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1266 #else 1267 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1268 #endif 1269 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1270 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1271 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1272 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1273 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1274 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1275 1276 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1291 { 1292 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1293 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1294 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1295 PetscErrorCode ierr; 1296 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1297 int fd; 1298 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1299 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1300 PetscScalar *column_values; 1301 PetscInt message_count,flowcontrolcount; 1302 FILE *file; 1303 1304 PetscFunctionBegin; 1305 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1306 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1307 nz = A->nz + B->nz; 1308 if (!rank) { 1309 header[0] = MAT_FILE_CLASSID; 1310 header[1] = mat->rmap->N; 1311 header[2] = mat->cmap->N; 1312 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1313 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1314 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1315 /* get largest number of rows any processor has */ 1316 rlen = mat->rmap->n; 1317 range = mat->rmap->range; 1318 for (i=1; i<size; i++) { 1319 rlen = PetscMax(rlen,range[i+1] - range[i]); 1320 } 1321 } else { 1322 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1323 rlen = mat->rmap->n; 1324 } 1325 1326 /* load up the local row counts */ 1327 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1328 for (i=0; i<mat->rmap->n; i++) { 1329 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1330 } 1331 1332 /* store the row lengths to the file */ 1333 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1334 if (!rank) { 1335 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1336 for (i=1; i<size; i++) { 1337 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1338 rlen = range[i+1] - range[i]; 1339 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1340 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1341 } 1342 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1343 } else { 1344 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1345 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1346 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1347 } 1348 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1349 1350 /* load up the local column indices */ 1351 nzmax = nz; /* )th processor needs space a largest processor needs */ 1352 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1353 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1354 cnt = 0; 1355 for (i=0; i<mat->rmap->n; i++) { 1356 for (j=B->i[i]; j<B->i[i+1]; j++) { 1357 if ( (col = garray[B->j[j]]) > cstart) break; 1358 column_indices[cnt++] = col; 1359 } 1360 for (k=A->i[i]; k<A->i[i+1]; k++) { 1361 column_indices[cnt++] = A->j[k] + cstart; 1362 } 1363 for (; j<B->i[i+1]; j++) { 1364 column_indices[cnt++] = garray[B->j[j]]; 1365 } 1366 } 1367 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1368 1369 /* store the column indices to the file */ 1370 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1371 if (!rank) { 1372 MPI_Status status; 1373 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1374 for (i=1; i<size; i++) { 1375 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1376 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1377 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1378 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1379 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1380 } 1381 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1382 } else { 1383 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1384 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1385 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1386 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1387 } 1388 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1389 1390 /* load up the local column values */ 1391 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1392 cnt = 0; 1393 for (i=0; i<mat->rmap->n; i++) { 1394 for (j=B->i[i]; j<B->i[i+1]; j++) { 1395 if ( garray[B->j[j]] > cstart) break; 1396 column_values[cnt++] = B->a[j]; 1397 } 1398 for (k=A->i[i]; k<A->i[i+1]; k++) { 1399 column_values[cnt++] = A->a[k]; 1400 } 1401 for (; j<B->i[i+1]; j++) { 1402 column_values[cnt++] = B->a[j]; 1403 } 1404 } 1405 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1406 1407 /* store the column values to the file */ 1408 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1409 if (!rank) { 1410 MPI_Status status; 1411 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1412 for (i=1; i<size; i++) { 1413 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1414 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1415 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1416 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1417 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1418 } 1419 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1420 } else { 1421 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1422 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1423 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1424 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1425 } 1426 ierr = PetscFree(column_values);CHKERRQ(ierr); 1427 1428 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1429 if (file) { 1430 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1431 } 1432 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1438 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1439 { 1440 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1441 PetscErrorCode ierr; 1442 PetscMPIInt rank = aij->rank,size = aij->size; 1443 PetscBool isdraw,iascii,isbinary; 1444 PetscViewer sviewer; 1445 PetscViewerFormat format; 1446 1447 PetscFunctionBegin; 1448 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1449 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1450 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1451 if (iascii) { 1452 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1453 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1454 MatInfo info; 1455 PetscBool inodes; 1456 1457 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1458 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1459 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1460 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1461 if (!inodes) { 1462 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1463 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1464 } else { 1465 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1466 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1467 } 1468 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1469 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1470 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1471 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1472 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1473 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1474 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1475 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1476 PetscFunctionReturn(0); 1477 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1478 PetscInt inodecount,inodelimit,*inodes; 1479 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1480 if (inodes) { 1481 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1482 } else { 1483 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1484 } 1485 PetscFunctionReturn(0); 1486 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1487 PetscFunctionReturn(0); 1488 } 1489 } else if (isbinary) { 1490 if (size == 1) { 1491 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1492 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1493 } else { 1494 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1495 } 1496 PetscFunctionReturn(0); 1497 } else if (isdraw) { 1498 PetscDraw draw; 1499 PetscBool isnull; 1500 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1501 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1502 } 1503 1504 if (size == 1) { 1505 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1506 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1507 } else { 1508 /* assemble the entire matrix onto first processor. */ 1509 Mat A; 1510 Mat_SeqAIJ *Aloc; 1511 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1512 MatScalar *a; 1513 1514 if (mat->rmap->N > 1024) { 1515 PetscBool flg = PETSC_FALSE; 1516 1517 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1518 if (!flg) { 1519 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1520 } 1521 } 1522 1523 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1524 if (!rank) { 1525 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1526 } else { 1527 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1528 } 1529 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1530 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1531 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1532 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1533 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1534 1535 /* copy over the A part */ 1536 Aloc = (Mat_SeqAIJ*)aij->A->data; 1537 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1538 row = mat->rmap->rstart; 1539 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1540 for (i=0; i<m; i++) { 1541 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1542 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1543 } 1544 aj = Aloc->j; 1545 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1546 1547 /* copy over the B part */ 1548 Aloc = (Mat_SeqAIJ*)aij->B->data; 1549 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1550 row = mat->rmap->rstart; 1551 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1552 ct = cols; 1553 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1554 for (i=0; i<m; i++) { 1555 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1556 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1557 } 1558 ierr = PetscFree(ct);CHKERRQ(ierr); 1559 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1560 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1561 /* 1562 Everyone has to call to draw the matrix since the graphics waits are 1563 synchronized across all processors that share the PetscDraw object 1564 */ 1565 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1566 if (!rank) { 1567 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1568 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1569 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1570 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1571 } 1572 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1573 ierr = MatDestroy(&A);CHKERRQ(ierr); 1574 } 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "MatView_MPIAIJ" 1580 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1581 { 1582 PetscErrorCode ierr; 1583 PetscBool iascii,isdraw,issocket,isbinary; 1584 1585 PetscFunctionBegin; 1586 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1587 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1588 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1589 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1590 if (iascii || isdraw || isbinary || issocket) { 1591 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1592 } else { 1593 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1594 } 1595 PetscFunctionReturn(0); 1596 } 1597 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "MatSOR_MPIAIJ" 1600 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1601 { 1602 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1603 PetscErrorCode ierr; 1604 Vec bb1 = 0; 1605 PetscBool hasop; 1606 1607 PetscFunctionBegin; 1608 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1609 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1610 } 1611 1612 if (flag == SOR_APPLY_UPPER) { 1613 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1618 if (flag & SOR_ZERO_INITIAL_GUESS) { 1619 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1620 its--; 1621 } 1622 1623 while (its--) { 1624 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1625 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1626 1627 /* update rhs: bb1 = bb - B*x */ 1628 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1629 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1630 1631 /* local sweep */ 1632 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1633 } 1634 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1635 if (flag & SOR_ZERO_INITIAL_GUESS) { 1636 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1637 its--; 1638 } 1639 while (its--) { 1640 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1642 1643 /* update rhs: bb1 = bb - B*x */ 1644 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1645 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1646 1647 /* local sweep */ 1648 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1649 } 1650 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1651 if (flag & SOR_ZERO_INITIAL_GUESS) { 1652 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1653 its--; 1654 } 1655 while (its--) { 1656 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1657 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1658 1659 /* update rhs: bb1 = bb - B*x */ 1660 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1661 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1662 1663 /* local sweep */ 1664 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1665 } 1666 } else if (flag & SOR_EISENSTAT) { 1667 Vec xx1; 1668 1669 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1670 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1671 1672 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1673 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1674 if (!mat->diag) { 1675 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1676 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1677 } 1678 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1679 if (hasop) { 1680 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1681 } else { 1682 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1683 } 1684 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1685 1686 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1687 1688 /* local sweep */ 1689 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1690 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1691 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1692 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1693 1694 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "MatPermute_MPIAIJ" 1700 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1701 { 1702 MPI_Comm comm; 1703 PetscInt first,local_rowsize,local_colsize; 1704 const PetscInt *rows; 1705 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1706 PetscErrorCode ierr; 1707 1708 PetscFunctionBegin; 1709 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1710 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1711 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1712 /* collect the global row permutation and invert it */ 1713 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1714 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1715 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1716 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1717 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1718 /* get the local target indices */ 1719 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1720 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1721 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1722 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1723 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1724 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1725 /* the column permutation is so much easier; 1726 make a local version of 'colp' and invert it */ 1727 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1728 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1729 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1730 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1731 /* now we just get the submatrix */ 1732 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1733 /* clean up */ 1734 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1735 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1736 PetscFunctionReturn(0); 1737 } 1738 1739 #undef __FUNCT__ 1740 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1741 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1742 { 1743 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1744 Mat A = mat->A,B = mat->B; 1745 PetscErrorCode ierr; 1746 PetscReal isend[5],irecv[5]; 1747 1748 PetscFunctionBegin; 1749 info->block_size = 1.0; 1750 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1751 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1752 isend[3] = info->memory; isend[4] = info->mallocs; 1753 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1754 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1755 isend[3] += info->memory; isend[4] += info->mallocs; 1756 if (flag == MAT_LOCAL) { 1757 info->nz_used = isend[0]; 1758 info->nz_allocated = isend[1]; 1759 info->nz_unneeded = isend[2]; 1760 info->memory = isend[3]; 1761 info->mallocs = isend[4]; 1762 } else if (flag == MAT_GLOBAL_MAX) { 1763 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1764 info->nz_used = irecv[0]; 1765 info->nz_allocated = irecv[1]; 1766 info->nz_unneeded = irecv[2]; 1767 info->memory = irecv[3]; 1768 info->mallocs = irecv[4]; 1769 } else if (flag == MAT_GLOBAL_SUM) { 1770 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1771 info->nz_used = irecv[0]; 1772 info->nz_allocated = irecv[1]; 1773 info->nz_unneeded = irecv[2]; 1774 info->memory = irecv[3]; 1775 info->mallocs = irecv[4]; 1776 } 1777 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1778 info->fill_ratio_needed = 0; 1779 info->factor_mallocs = 0; 1780 1781 PetscFunctionReturn(0); 1782 } 1783 1784 #undef __FUNCT__ 1785 #define __FUNCT__ "MatSetOption_MPIAIJ" 1786 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1787 { 1788 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1789 PetscErrorCode ierr; 1790 1791 PetscFunctionBegin; 1792 switch (op) { 1793 case MAT_NEW_NONZERO_LOCATIONS: 1794 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1795 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1796 case MAT_KEEP_NONZERO_PATTERN: 1797 case MAT_NEW_NONZERO_LOCATION_ERR: 1798 case MAT_USE_INODES: 1799 case MAT_IGNORE_ZERO_ENTRIES: 1800 MatCheckPreallocated(A,1); 1801 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1802 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1803 break; 1804 case MAT_ROW_ORIENTED: 1805 a->roworiented = flg; 1806 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1807 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1808 break; 1809 case MAT_NEW_DIAGONALS: 1810 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1811 break; 1812 case MAT_IGNORE_OFF_PROC_ENTRIES: 1813 a->donotstash = flg; 1814 break; 1815 case MAT_SPD: 1816 A->spd_set = PETSC_TRUE; 1817 A->spd = flg; 1818 if (flg) { 1819 A->symmetric = PETSC_TRUE; 1820 A->structurally_symmetric = PETSC_TRUE; 1821 A->symmetric_set = PETSC_TRUE; 1822 A->structurally_symmetric_set = PETSC_TRUE; 1823 } 1824 break; 1825 case MAT_SYMMETRIC: 1826 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1827 break; 1828 case MAT_STRUCTURALLY_SYMMETRIC: 1829 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1830 break; 1831 case MAT_HERMITIAN: 1832 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1833 break; 1834 case MAT_SYMMETRY_ETERNAL: 1835 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1836 break; 1837 default: 1838 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1839 } 1840 PetscFunctionReturn(0); 1841 } 1842 1843 #undef __FUNCT__ 1844 #define __FUNCT__ "MatGetRow_MPIAIJ" 1845 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1846 { 1847 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1848 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1849 PetscErrorCode ierr; 1850 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1851 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1852 PetscInt *cmap,*idx_p; 1853 1854 PetscFunctionBegin; 1855 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1856 mat->getrowactive = PETSC_TRUE; 1857 1858 if (!mat->rowvalues && (idx || v)) { 1859 /* 1860 allocate enough space to hold information from the longest row. 1861 */ 1862 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1863 PetscInt max = 1,tmp; 1864 for (i=0; i<matin->rmap->n; i++) { 1865 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1866 if (max < tmp) { max = tmp; } 1867 } 1868 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1869 } 1870 1871 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1872 lrow = row - rstart; 1873 1874 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1875 if (!v) {pvA = 0; pvB = 0;} 1876 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1877 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1878 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1879 nztot = nzA + nzB; 1880 1881 cmap = mat->garray; 1882 if (v || idx) { 1883 if (nztot) { 1884 /* Sort by increasing column numbers, assuming A and B already sorted */ 1885 PetscInt imark = -1; 1886 if (v) { 1887 *v = v_p = mat->rowvalues; 1888 for (i=0; i<nzB; i++) { 1889 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1890 else break; 1891 } 1892 imark = i; 1893 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1894 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1895 } 1896 if (idx) { 1897 *idx = idx_p = mat->rowindices; 1898 if (imark > -1) { 1899 for (i=0; i<imark; i++) { 1900 idx_p[i] = cmap[cworkB[i]]; 1901 } 1902 } else { 1903 for (i=0; i<nzB; i++) { 1904 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1905 else break; 1906 } 1907 imark = i; 1908 } 1909 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1910 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1911 } 1912 } else { 1913 if (idx) *idx = 0; 1914 if (v) *v = 0; 1915 } 1916 } 1917 *nz = nztot; 1918 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1919 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1920 PetscFunctionReturn(0); 1921 } 1922 1923 #undef __FUNCT__ 1924 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1925 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1926 { 1927 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1928 1929 PetscFunctionBegin; 1930 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1931 aij->getrowactive = PETSC_FALSE; 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #undef __FUNCT__ 1936 #define __FUNCT__ "MatNorm_MPIAIJ" 1937 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1938 { 1939 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1940 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1941 PetscErrorCode ierr; 1942 PetscInt i,j,cstart = mat->cmap->rstart; 1943 PetscReal sum = 0.0; 1944 MatScalar *v; 1945 1946 PetscFunctionBegin; 1947 if (aij->size == 1) { 1948 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1949 } else { 1950 if (type == NORM_FROBENIUS) { 1951 v = amat->a; 1952 for (i=0; i<amat->nz; i++) { 1953 #if defined(PETSC_USE_COMPLEX) 1954 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1955 #else 1956 sum += (*v)*(*v); v++; 1957 #endif 1958 } 1959 v = bmat->a; 1960 for (i=0; i<bmat->nz; i++) { 1961 #if defined(PETSC_USE_COMPLEX) 1962 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1963 #else 1964 sum += (*v)*(*v); v++; 1965 #endif 1966 } 1967 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1968 *norm = PetscSqrtReal(*norm); 1969 } else if (type == NORM_1) { /* max column norm */ 1970 PetscReal *tmp,*tmp2; 1971 PetscInt *jj,*garray = aij->garray; 1972 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1973 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1974 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1975 *norm = 0.0; 1976 v = amat->a; jj = amat->j; 1977 for (j=0; j<amat->nz; j++) { 1978 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1979 } 1980 v = bmat->a; jj = bmat->j; 1981 for (j=0; j<bmat->nz; j++) { 1982 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1983 } 1984 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1985 for (j=0; j<mat->cmap->N; j++) { 1986 if (tmp2[j] > *norm) *norm = tmp2[j]; 1987 } 1988 ierr = PetscFree(tmp);CHKERRQ(ierr); 1989 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1990 } else if (type == NORM_INFINITY) { /* max row norm */ 1991 PetscReal ntemp = 0.0; 1992 for (j=0; j<aij->A->rmap->n; j++) { 1993 v = amat->a + amat->i[j]; 1994 sum = 0.0; 1995 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1996 sum += PetscAbsScalar(*v); v++; 1997 } 1998 v = bmat->a + bmat->i[j]; 1999 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2000 sum += PetscAbsScalar(*v); v++; 2001 } 2002 if (sum > ntemp) ntemp = sum; 2003 } 2004 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2005 } else { 2006 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2007 } 2008 } 2009 PetscFunctionReturn(0); 2010 } 2011 2012 #undef __FUNCT__ 2013 #define __FUNCT__ "MatTranspose_MPIAIJ" 2014 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2015 { 2016 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2017 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2018 PetscErrorCode ierr; 2019 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2020 PetscInt cstart=A->cmap->rstart,ncol; 2021 Mat B; 2022 MatScalar *array; 2023 2024 PetscFunctionBegin; 2025 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2026 2027 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2028 ai = Aloc->i; aj = Aloc->j; 2029 bi = Bloc->i; bj = Bloc->j; 2030 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2031 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2032 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2033 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2034 for (i=0; i<ai[ma]; i++){ 2035 d_nnz[aj[i]] ++; 2036 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2037 } 2038 2039 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2040 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2041 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2042 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2043 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2044 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2045 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2046 } else { 2047 B = *matout; 2048 } 2049 2050 /* copy over the A part */ 2051 array = Aloc->a; 2052 row = A->rmap->rstart; 2053 for (i=0; i<ma; i++) { 2054 ncol = ai[i+1]-ai[i]; 2055 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2056 row++; array += ncol; aj += ncol; 2057 } 2058 aj = Aloc->j; 2059 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2060 2061 /* copy over the B part */ 2062 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2063 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2064 array = Bloc->a; 2065 row = A->rmap->rstart; 2066 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2067 cols_tmp = cols; 2068 for (i=0; i<mb; i++) { 2069 ncol = bi[i+1]-bi[i]; 2070 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2071 row++; array += ncol; cols_tmp += ncol; 2072 } 2073 ierr = PetscFree(cols);CHKERRQ(ierr); 2074 2075 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2076 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2077 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2078 *matout = B; 2079 } else { 2080 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2081 } 2082 PetscFunctionReturn(0); 2083 } 2084 2085 #undef __FUNCT__ 2086 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2087 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2088 { 2089 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2090 Mat a = aij->A,b = aij->B; 2091 PetscErrorCode ierr; 2092 PetscInt s1,s2,s3; 2093 2094 PetscFunctionBegin; 2095 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2096 if (rr) { 2097 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2098 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2099 /* Overlap communication with computation. */ 2100 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2101 } 2102 if (ll) { 2103 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2104 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2105 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2106 } 2107 /* scale the diagonal block */ 2108 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2109 2110 if (rr) { 2111 /* Do a scatter end and then right scale the off-diagonal block */ 2112 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2113 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2114 } 2115 2116 PetscFunctionReturn(0); 2117 } 2118 2119 #undef __FUNCT__ 2120 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2121 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2122 { 2123 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2124 PetscErrorCode ierr; 2125 2126 PetscFunctionBegin; 2127 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2128 PetscFunctionReturn(0); 2129 } 2130 2131 #undef __FUNCT__ 2132 #define __FUNCT__ "MatEqual_MPIAIJ" 2133 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2134 { 2135 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2136 Mat a,b,c,d; 2137 PetscBool flg; 2138 PetscErrorCode ierr; 2139 2140 PetscFunctionBegin; 2141 a = matA->A; b = matA->B; 2142 c = matB->A; d = matB->B; 2143 2144 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2145 if (flg) { 2146 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2147 } 2148 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2149 PetscFunctionReturn(0); 2150 } 2151 2152 #undef __FUNCT__ 2153 #define __FUNCT__ "MatCopy_MPIAIJ" 2154 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2155 { 2156 PetscErrorCode ierr; 2157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2158 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2159 2160 PetscFunctionBegin; 2161 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2162 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2163 /* because of the column compression in the off-processor part of the matrix a->B, 2164 the number of columns in a->B and b->B may be different, hence we cannot call 2165 the MatCopy() directly on the two parts. If need be, we can provide a more 2166 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2167 then copying the submatrices */ 2168 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2169 } else { 2170 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2171 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2172 } 2173 PetscFunctionReturn(0); 2174 } 2175 2176 #undef __FUNCT__ 2177 #define __FUNCT__ "MatSetUp_MPIAIJ" 2178 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2179 { 2180 PetscErrorCode ierr; 2181 2182 PetscFunctionBegin; 2183 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2184 PetscFunctionReturn(0); 2185 } 2186 2187 #undef __FUNCT__ 2188 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2189 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2190 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2191 { 2192 PetscInt i,m=Y->rmap->N; 2193 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2194 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2195 const PetscInt *xi = x->i,*yi = y->i; 2196 2197 PetscFunctionBegin; 2198 /* Set the number of nonzeros in the new matrix */ 2199 for(i=0; i<m; i++) { 2200 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2201 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2202 nnz[i] = 0; 2203 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2204 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2205 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2206 nnz[i]++; 2207 } 2208 for (; k<nzy; k++) nnz[i]++; 2209 } 2210 PetscFunctionReturn(0); 2211 } 2212 2213 #undef __FUNCT__ 2214 #define __FUNCT__ "MatAXPY_MPIAIJ" 2215 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2216 { 2217 PetscErrorCode ierr; 2218 PetscInt i; 2219 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2220 PetscBLASInt bnz,one=1; 2221 Mat_SeqAIJ *x,*y; 2222 2223 PetscFunctionBegin; 2224 if (str == SAME_NONZERO_PATTERN) { 2225 PetscScalar alpha = a; 2226 x = (Mat_SeqAIJ *)xx->A->data; 2227 y = (Mat_SeqAIJ *)yy->A->data; 2228 bnz = PetscBLASIntCast(x->nz); 2229 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2230 x = (Mat_SeqAIJ *)xx->B->data; 2231 y = (Mat_SeqAIJ *)yy->B->data; 2232 bnz = PetscBLASIntCast(x->nz); 2233 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2234 } else if (str == SUBSET_NONZERO_PATTERN) { 2235 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2236 2237 x = (Mat_SeqAIJ *)xx->B->data; 2238 y = (Mat_SeqAIJ *)yy->B->data; 2239 if (y->xtoy && y->XtoY != xx->B) { 2240 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2241 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2242 } 2243 if (!y->xtoy) { /* get xtoy */ 2244 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2245 y->XtoY = xx->B; 2246 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2247 } 2248 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2249 } else { 2250 Mat B; 2251 PetscInt *nnz_d,*nnz_o; 2252 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2253 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2254 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2255 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2256 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2257 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2258 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2259 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2260 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2261 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2262 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2263 ierr = MatHeaderReplace(Y,B); 2264 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2265 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2266 } 2267 PetscFunctionReturn(0); 2268 } 2269 2270 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2271 2272 #undef __FUNCT__ 2273 #define __FUNCT__ "MatConjugate_MPIAIJ" 2274 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2275 { 2276 #if defined(PETSC_USE_COMPLEX) 2277 PetscErrorCode ierr; 2278 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2279 2280 PetscFunctionBegin; 2281 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2282 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2283 #else 2284 PetscFunctionBegin; 2285 #endif 2286 PetscFunctionReturn(0); 2287 } 2288 2289 #undef __FUNCT__ 2290 #define __FUNCT__ "MatRealPart_MPIAIJ" 2291 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2292 { 2293 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2294 PetscErrorCode ierr; 2295 2296 PetscFunctionBegin; 2297 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2298 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2299 PetscFunctionReturn(0); 2300 } 2301 2302 #undef __FUNCT__ 2303 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2304 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2305 { 2306 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2307 PetscErrorCode ierr; 2308 2309 PetscFunctionBegin; 2310 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2311 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2312 PetscFunctionReturn(0); 2313 } 2314 2315 #ifdef PETSC_HAVE_PBGL 2316 2317 #include <boost/parallel/mpi/bsp_process_group.hpp> 2318 #include <boost/graph/distributed/ilu_default_graph.hpp> 2319 #include <boost/graph/distributed/ilu_0_block.hpp> 2320 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2321 #include <boost/graph/distributed/petsc/interface.hpp> 2322 #include <boost/multi_array.hpp> 2323 #include <boost/parallel/distributed_property_map->hpp> 2324 2325 #undef __FUNCT__ 2326 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2327 /* 2328 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2329 */ 2330 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2331 { 2332 namespace petsc = boost::distributed::petsc; 2333 2334 namespace graph_dist = boost::graph::distributed; 2335 using boost::graph::distributed::ilu_default::process_group_type; 2336 using boost::graph::ilu_permuted; 2337 2338 PetscBool row_identity, col_identity; 2339 PetscContainer c; 2340 PetscInt m, n, M, N; 2341 PetscErrorCode ierr; 2342 2343 PetscFunctionBegin; 2344 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2345 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2346 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2347 if (!row_identity || !col_identity) { 2348 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2349 } 2350 2351 process_group_type pg; 2352 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2353 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2354 lgraph_type& level_graph = *lgraph_p; 2355 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2356 2357 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2358 ilu_permuted(level_graph); 2359 2360 /* put together the new matrix */ 2361 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2362 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2363 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2364 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2365 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2366 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2367 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2368 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2369 2370 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2371 ierr = PetscContainerSetPointer(c, lgraph_p); 2372 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2373 ierr = PetscContainerDestroy(&c); 2374 PetscFunctionReturn(0); 2375 } 2376 2377 #undef __FUNCT__ 2378 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2379 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2380 { 2381 PetscFunctionBegin; 2382 PetscFunctionReturn(0); 2383 } 2384 2385 #undef __FUNCT__ 2386 #define __FUNCT__ "MatSolve_MPIAIJ" 2387 /* 2388 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2389 */ 2390 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2391 { 2392 namespace graph_dist = boost::graph::distributed; 2393 2394 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2395 lgraph_type* lgraph_p; 2396 PetscContainer c; 2397 PetscErrorCode ierr; 2398 2399 PetscFunctionBegin; 2400 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2401 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2402 ierr = VecCopy(b, x);CHKERRQ(ierr); 2403 2404 PetscScalar* array_x; 2405 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2406 PetscInt sx; 2407 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2408 2409 PetscScalar* array_b; 2410 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2411 PetscInt sb; 2412 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2413 2414 lgraph_type& level_graph = *lgraph_p; 2415 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2416 2417 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2418 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2419 ref_x(array_x, boost::extents[num_vertices(graph)]); 2420 2421 typedef boost::iterator_property_map<array_ref_type::iterator, 2422 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2423 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2424 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2425 2426 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2427 2428 PetscFunctionReturn(0); 2429 } 2430 #endif 2431 2432 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2433 PetscInt nzlocal,nsends,nrecvs; 2434 PetscMPIInt *send_rank,*recv_rank; 2435 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2436 PetscScalar *sbuf_a,**rbuf_a; 2437 PetscErrorCode (*Destroy)(Mat); 2438 } Mat_Redundant; 2439 2440 #undef __FUNCT__ 2441 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2442 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2443 { 2444 PetscErrorCode ierr; 2445 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2446 PetscInt i; 2447 2448 PetscFunctionBegin; 2449 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2450 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2451 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2452 for (i=0; i<redund->nrecvs; i++){ 2453 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2454 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2455 } 2456 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2457 ierr = PetscFree(redund);CHKERRQ(ierr); 2458 PetscFunctionReturn(0); 2459 } 2460 2461 #undef __FUNCT__ 2462 #define __FUNCT__ "MatDestroy_MatRedundant" 2463 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2464 { 2465 PetscErrorCode ierr; 2466 PetscContainer container; 2467 Mat_Redundant *redund=PETSC_NULL; 2468 2469 PetscFunctionBegin; 2470 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2471 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2472 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2473 A->ops->destroy = redund->Destroy; 2474 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2475 if (A->ops->destroy) { 2476 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2477 } 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #undef __FUNCT__ 2482 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2483 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2484 { 2485 PetscMPIInt rank,size; 2486 MPI_Comm comm=((PetscObject)mat)->comm; 2487 PetscErrorCode ierr; 2488 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2489 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2490 PetscInt *rowrange=mat->rmap->range; 2491 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2492 Mat A=aij->A,B=aij->B,C=*matredundant; 2493 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2494 PetscScalar *sbuf_a; 2495 PetscInt nzlocal=a->nz+b->nz; 2496 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2497 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2498 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2499 MatScalar *aworkA,*aworkB; 2500 PetscScalar *vals; 2501 PetscMPIInt tag1,tag2,tag3,imdex; 2502 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2503 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2504 MPI_Status recv_status,*send_status; 2505 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2506 PetscInt **rbuf_j=PETSC_NULL; 2507 PetscScalar **rbuf_a=PETSC_NULL; 2508 Mat_Redundant *redund=PETSC_NULL; 2509 PetscContainer container; 2510 2511 PetscFunctionBegin; 2512 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2513 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2514 2515 if (reuse == MAT_REUSE_MATRIX) { 2516 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2517 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2518 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2519 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2520 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2521 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2522 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2523 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2524 2525 nsends = redund->nsends; 2526 nrecvs = redund->nrecvs; 2527 send_rank = redund->send_rank; 2528 recv_rank = redund->recv_rank; 2529 sbuf_nz = redund->sbuf_nz; 2530 rbuf_nz = redund->rbuf_nz; 2531 sbuf_j = redund->sbuf_j; 2532 sbuf_a = redund->sbuf_a; 2533 rbuf_j = redund->rbuf_j; 2534 rbuf_a = redund->rbuf_a; 2535 } 2536 2537 if (reuse == MAT_INITIAL_MATRIX){ 2538 PetscMPIInt subrank,subsize; 2539 PetscInt nleftover,np_subcomm; 2540 /* get the destination processors' id send_rank, nsends and nrecvs */ 2541 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2542 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2543 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2544 np_subcomm = size/nsubcomm; 2545 nleftover = size - nsubcomm*np_subcomm; 2546 nsends = 0; nrecvs = 0; 2547 for (i=0; i<size; i++){ /* i=rank*/ 2548 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2549 send_rank[nsends] = i; nsends++; 2550 recv_rank[nrecvs++] = i; 2551 } 2552 } 2553 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2554 i = size-nleftover-1; 2555 j = 0; 2556 while (j < nsubcomm - nleftover){ 2557 send_rank[nsends++] = i; 2558 i--; j++; 2559 } 2560 } 2561 2562 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2563 for (i=0; i<nleftover; i++){ 2564 recv_rank[nrecvs++] = size-nleftover+i; 2565 } 2566 } 2567 2568 /* allocate sbuf_j, sbuf_a */ 2569 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2570 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2571 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2572 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2573 2574 /* copy mat's local entries into the buffers */ 2575 if (reuse == MAT_INITIAL_MATRIX){ 2576 rownz_max = 0; 2577 rptr = sbuf_j; 2578 cols = sbuf_j + rend-rstart + 1; 2579 vals = sbuf_a; 2580 rptr[0] = 0; 2581 for (i=0; i<rend-rstart; i++){ 2582 row = i + rstart; 2583 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2584 ncols = nzA + nzB; 2585 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2586 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2587 /* load the column indices for this row into cols */ 2588 lwrite = 0; 2589 for (l=0; l<nzB; l++) { 2590 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2591 vals[lwrite] = aworkB[l]; 2592 cols[lwrite++] = ctmp; 2593 } 2594 } 2595 for (l=0; l<nzA; l++){ 2596 vals[lwrite] = aworkA[l]; 2597 cols[lwrite++] = cstart + cworkA[l]; 2598 } 2599 for (l=0; l<nzB; l++) { 2600 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2601 vals[lwrite] = aworkB[l]; 2602 cols[lwrite++] = ctmp; 2603 } 2604 } 2605 vals += ncols; 2606 cols += ncols; 2607 rptr[i+1] = rptr[i] + ncols; 2608 if (rownz_max < ncols) rownz_max = ncols; 2609 } 2610 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2611 } else { /* only copy matrix values into sbuf_a */ 2612 rptr = sbuf_j; 2613 vals = sbuf_a; 2614 rptr[0] = 0; 2615 for (i=0; i<rend-rstart; i++){ 2616 row = i + rstart; 2617 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2618 ncols = nzA + nzB; 2619 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2620 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2621 lwrite = 0; 2622 for (l=0; l<nzB; l++) { 2623 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2624 } 2625 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2626 for (l=0; l<nzB; l++) { 2627 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2628 } 2629 vals += ncols; 2630 rptr[i+1] = rptr[i] + ncols; 2631 } 2632 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2633 2634 /* send nzlocal to others, and recv other's nzlocal */ 2635 /*--------------------------------------------------*/ 2636 if (reuse == MAT_INITIAL_MATRIX){ 2637 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2638 s_waits2 = s_waits3 + nsends; 2639 s_waits1 = s_waits2 + nsends; 2640 r_waits1 = s_waits1 + nsends; 2641 r_waits2 = r_waits1 + nrecvs; 2642 r_waits3 = r_waits2 + nrecvs; 2643 } else { 2644 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2645 r_waits3 = s_waits3 + nsends; 2646 } 2647 2648 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2649 if (reuse == MAT_INITIAL_MATRIX){ 2650 /* get new tags to keep the communication clean */ 2651 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2652 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2653 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2654 2655 /* post receives of other's nzlocal */ 2656 for (i=0; i<nrecvs; i++){ 2657 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2658 } 2659 /* send nzlocal to others */ 2660 for (i=0; i<nsends; i++){ 2661 sbuf_nz[i] = nzlocal; 2662 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2663 } 2664 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2665 count = nrecvs; 2666 while (count) { 2667 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2668 recv_rank[imdex] = recv_status.MPI_SOURCE; 2669 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2670 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2671 2672 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2673 rbuf_nz[imdex] += i + 2; 2674 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2675 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2676 count--; 2677 } 2678 /* wait on sends of nzlocal */ 2679 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2680 /* send mat->i,j to others, and recv from other's */ 2681 /*------------------------------------------------*/ 2682 for (i=0; i<nsends; i++){ 2683 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2684 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2685 } 2686 /* wait on receives of mat->i,j */ 2687 /*------------------------------*/ 2688 count = nrecvs; 2689 while (count) { 2690 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2691 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2692 count--; 2693 } 2694 /* wait on sends of mat->i,j */ 2695 /*---------------------------*/ 2696 if (nsends) { 2697 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2698 } 2699 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2700 2701 /* post receives, send and receive mat->a */ 2702 /*----------------------------------------*/ 2703 for (imdex=0; imdex<nrecvs; imdex++) { 2704 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2705 } 2706 for (i=0; i<nsends; i++){ 2707 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2708 } 2709 count = nrecvs; 2710 while (count) { 2711 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2712 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2713 count--; 2714 } 2715 if (nsends) { 2716 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2717 } 2718 2719 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2720 2721 /* create redundant matrix */ 2722 /*-------------------------*/ 2723 if (reuse == MAT_INITIAL_MATRIX){ 2724 /* compute rownz_max for preallocation */ 2725 for (imdex=0; imdex<nrecvs; imdex++){ 2726 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2727 rptr = rbuf_j[imdex]; 2728 for (i=0; i<j; i++){ 2729 ncols = rptr[i+1] - rptr[i]; 2730 if (rownz_max < ncols) rownz_max = ncols; 2731 } 2732 } 2733 2734 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2735 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2736 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2737 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2738 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2739 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2740 } else { 2741 C = *matredundant; 2742 } 2743 2744 /* insert local matrix entries */ 2745 rptr = sbuf_j; 2746 cols = sbuf_j + rend-rstart + 1; 2747 vals = sbuf_a; 2748 for (i=0; i<rend-rstart; i++){ 2749 row = i + rstart; 2750 ncols = rptr[i+1] - rptr[i]; 2751 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2752 vals += ncols; 2753 cols += ncols; 2754 } 2755 /* insert received matrix entries */ 2756 for (imdex=0; imdex<nrecvs; imdex++){ 2757 rstart = rowrange[recv_rank[imdex]]; 2758 rend = rowrange[recv_rank[imdex]+1]; 2759 rptr = rbuf_j[imdex]; 2760 cols = rbuf_j[imdex] + rend-rstart + 1; 2761 vals = rbuf_a[imdex]; 2762 for (i=0; i<rend-rstart; i++){ 2763 row = i + rstart; 2764 ncols = rptr[i+1] - rptr[i]; 2765 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2766 vals += ncols; 2767 cols += ncols; 2768 } 2769 } 2770 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2771 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2772 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2773 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2774 if (reuse == MAT_INITIAL_MATRIX) { 2775 PetscContainer container; 2776 *matredundant = C; 2777 /* create a supporting struct and attach it to C for reuse */ 2778 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2779 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2780 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2781 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2782 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2783 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2784 2785 redund->nzlocal = nzlocal; 2786 redund->nsends = nsends; 2787 redund->nrecvs = nrecvs; 2788 redund->send_rank = send_rank; 2789 redund->recv_rank = recv_rank; 2790 redund->sbuf_nz = sbuf_nz; 2791 redund->rbuf_nz = rbuf_nz; 2792 redund->sbuf_j = sbuf_j; 2793 redund->sbuf_a = sbuf_a; 2794 redund->rbuf_j = rbuf_j; 2795 redund->rbuf_a = rbuf_a; 2796 2797 redund->Destroy = C->ops->destroy; 2798 C->ops->destroy = MatDestroy_MatRedundant; 2799 } 2800 PetscFunctionReturn(0); 2801 } 2802 2803 #undef __FUNCT__ 2804 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2805 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2806 { 2807 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2808 PetscErrorCode ierr; 2809 PetscInt i,*idxb = 0; 2810 PetscScalar *va,*vb; 2811 Vec vtmp; 2812 2813 PetscFunctionBegin; 2814 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2815 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2816 if (idx) { 2817 for (i=0; i<A->rmap->n; i++) { 2818 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2819 } 2820 } 2821 2822 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2823 if (idx) { 2824 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2825 } 2826 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2827 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2828 2829 for (i=0; i<A->rmap->n; i++){ 2830 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2831 va[i] = vb[i]; 2832 if (idx) idx[i] = a->garray[idxb[i]]; 2833 } 2834 } 2835 2836 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2837 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2838 ierr = PetscFree(idxb);CHKERRQ(ierr); 2839 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2840 PetscFunctionReturn(0); 2841 } 2842 2843 #undef __FUNCT__ 2844 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2845 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2846 { 2847 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2848 PetscErrorCode ierr; 2849 PetscInt i,*idxb = 0; 2850 PetscScalar *va,*vb; 2851 Vec vtmp; 2852 2853 PetscFunctionBegin; 2854 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2855 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2856 if (idx) { 2857 for (i=0; i<A->cmap->n; i++) { 2858 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2859 } 2860 } 2861 2862 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2863 if (idx) { 2864 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2865 } 2866 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2867 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2868 2869 for (i=0; i<A->rmap->n; i++){ 2870 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2871 va[i] = vb[i]; 2872 if (idx) idx[i] = a->garray[idxb[i]]; 2873 } 2874 } 2875 2876 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2877 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2878 ierr = PetscFree(idxb);CHKERRQ(ierr); 2879 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2880 PetscFunctionReturn(0); 2881 } 2882 2883 #undef __FUNCT__ 2884 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2885 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2886 { 2887 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2888 PetscInt n = A->rmap->n; 2889 PetscInt cstart = A->cmap->rstart; 2890 PetscInt *cmap = mat->garray; 2891 PetscInt *diagIdx, *offdiagIdx; 2892 Vec diagV, offdiagV; 2893 PetscScalar *a, *diagA, *offdiagA; 2894 PetscInt r; 2895 PetscErrorCode ierr; 2896 2897 PetscFunctionBegin; 2898 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2899 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2900 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2901 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2902 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2903 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2904 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2905 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2906 for(r = 0; r < n; ++r) { 2907 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2908 a[r] = diagA[r]; 2909 idx[r] = cstart + diagIdx[r]; 2910 } else { 2911 a[r] = offdiagA[r]; 2912 idx[r] = cmap[offdiagIdx[r]]; 2913 } 2914 } 2915 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2916 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2917 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2918 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2919 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2920 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2921 PetscFunctionReturn(0); 2922 } 2923 2924 #undef __FUNCT__ 2925 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2926 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2927 { 2928 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2929 PetscInt n = A->rmap->n; 2930 PetscInt cstart = A->cmap->rstart; 2931 PetscInt *cmap = mat->garray; 2932 PetscInt *diagIdx, *offdiagIdx; 2933 Vec diagV, offdiagV; 2934 PetscScalar *a, *diagA, *offdiagA; 2935 PetscInt r; 2936 PetscErrorCode ierr; 2937 2938 PetscFunctionBegin; 2939 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2940 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2941 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2942 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2943 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2944 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2945 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2946 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2947 for(r = 0; r < n; ++r) { 2948 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2949 a[r] = diagA[r]; 2950 idx[r] = cstart + diagIdx[r]; 2951 } else { 2952 a[r] = offdiagA[r]; 2953 idx[r] = cmap[offdiagIdx[r]]; 2954 } 2955 } 2956 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2957 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2958 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2959 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2960 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2961 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2962 PetscFunctionReturn(0); 2963 } 2964 2965 #undef __FUNCT__ 2966 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2967 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2968 { 2969 PetscErrorCode ierr; 2970 Mat *dummy; 2971 2972 PetscFunctionBegin; 2973 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2974 *newmat = *dummy; 2975 ierr = PetscFree(dummy);CHKERRQ(ierr); 2976 PetscFunctionReturn(0); 2977 } 2978 2979 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2980 2981 #undef __FUNCT__ 2982 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2983 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2984 { 2985 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2986 PetscErrorCode ierr; 2987 2988 PetscFunctionBegin; 2989 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2990 PetscFunctionReturn(0); 2991 } 2992 2993 2994 /* -------------------------------------------------------------------*/ 2995 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2996 MatGetRow_MPIAIJ, 2997 MatRestoreRow_MPIAIJ, 2998 MatMult_MPIAIJ, 2999 /* 4*/ MatMultAdd_MPIAIJ, 3000 MatMultTranspose_MPIAIJ, 3001 MatMultTransposeAdd_MPIAIJ, 3002 #ifdef PETSC_HAVE_PBGL 3003 MatSolve_MPIAIJ, 3004 #else 3005 0, 3006 #endif 3007 0, 3008 0, 3009 /*10*/ 0, 3010 0, 3011 0, 3012 MatSOR_MPIAIJ, 3013 MatTranspose_MPIAIJ, 3014 /*15*/ MatGetInfo_MPIAIJ, 3015 MatEqual_MPIAIJ, 3016 MatGetDiagonal_MPIAIJ, 3017 MatDiagonalScale_MPIAIJ, 3018 MatNorm_MPIAIJ, 3019 /*20*/ MatAssemblyBegin_MPIAIJ, 3020 MatAssemblyEnd_MPIAIJ, 3021 MatSetOption_MPIAIJ, 3022 MatZeroEntries_MPIAIJ, 3023 /*24*/ MatZeroRows_MPIAIJ, 3024 0, 3025 #ifdef PETSC_HAVE_PBGL 3026 0, 3027 #else 3028 0, 3029 #endif 3030 0, 3031 0, 3032 /*29*/ MatSetUp_MPIAIJ, 3033 #ifdef PETSC_HAVE_PBGL 3034 0, 3035 #else 3036 0, 3037 #endif 3038 0, 3039 0, 3040 0, 3041 /*34*/ MatDuplicate_MPIAIJ, 3042 0, 3043 0, 3044 0, 3045 0, 3046 /*39*/ MatAXPY_MPIAIJ, 3047 MatGetSubMatrices_MPIAIJ, 3048 MatIncreaseOverlap_MPIAIJ, 3049 MatGetValues_MPIAIJ, 3050 MatCopy_MPIAIJ, 3051 /*44*/ MatGetRowMax_MPIAIJ, 3052 MatScale_MPIAIJ, 3053 0, 3054 0, 3055 MatZeroRowsColumns_MPIAIJ, 3056 /*49*/ 0, 3057 0, 3058 0, 3059 0, 3060 0, 3061 /*54*/ MatFDColoringCreate_MPIAIJ, 3062 0, 3063 MatSetUnfactored_MPIAIJ, 3064 0, /* MatPermute_MPIAIJ, impl currently broken */ 3065 0, 3066 /*59*/ MatGetSubMatrix_MPIAIJ, 3067 MatDestroy_MPIAIJ, 3068 MatView_MPIAIJ, 3069 0, 3070 0, 3071 /*64*/ 0, 3072 0, 3073 0, 3074 0, 3075 0, 3076 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3077 MatGetRowMinAbs_MPIAIJ, 3078 0, 3079 MatSetColoring_MPIAIJ, 3080 #if defined(PETSC_HAVE_ADIC) 3081 MatSetValuesAdic_MPIAIJ, 3082 #else 3083 0, 3084 #endif 3085 MatSetValuesAdifor_MPIAIJ, 3086 /*75*/ MatFDColoringApply_AIJ, 3087 0, 3088 0, 3089 0, 3090 0, 3091 /*80*/ 0, 3092 0, 3093 0, 3094 /*83*/ MatLoad_MPIAIJ, 3095 0, 3096 0, 3097 0, 3098 0, 3099 0, 3100 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3101 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3102 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3103 MatPtAP_Basic, 3104 MatPtAPSymbolic_MPIAIJ, 3105 /*94*/ MatPtAPNumeric_MPIAIJ, 3106 0, 3107 0, 3108 0, 3109 0, 3110 /*99*/ 0, 3111 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3112 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3113 MatConjugate_MPIAIJ, 3114 0, 3115 /*104*/MatSetValuesRow_MPIAIJ, 3116 MatRealPart_MPIAIJ, 3117 MatImaginaryPart_MPIAIJ, 3118 0, 3119 0, 3120 /*109*/0, 3121 MatGetRedundantMatrix_MPIAIJ, 3122 MatGetRowMin_MPIAIJ, 3123 0, 3124 0, 3125 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3126 0, 3127 0, 3128 0, 3129 0, 3130 /*119*/0, 3131 0, 3132 0, 3133 0, 3134 MatGetMultiProcBlock_MPIAIJ, 3135 /*124*/MatFindNonZeroRows_MPIAIJ, 3136 MatGetColumnNorms_MPIAIJ, 3137 MatInvertBlockDiagonal_MPIAIJ, 3138 0, 3139 MatGetSubMatricesParallel_MPIAIJ, 3140 /*129*/0, 3141 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3142 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3143 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3144 0, 3145 /*134*/0, 3146 0, 3147 0, 3148 0, 3149 0 3150 }; 3151 3152 /* ----------------------------------------------------------------------------------------*/ 3153 3154 EXTERN_C_BEGIN 3155 #undef __FUNCT__ 3156 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3157 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3158 { 3159 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3160 PetscErrorCode ierr; 3161 3162 PetscFunctionBegin; 3163 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3164 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3165 PetscFunctionReturn(0); 3166 } 3167 EXTERN_C_END 3168 3169 EXTERN_C_BEGIN 3170 #undef __FUNCT__ 3171 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3172 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3173 { 3174 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3175 PetscErrorCode ierr; 3176 3177 PetscFunctionBegin; 3178 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3179 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3180 PetscFunctionReturn(0); 3181 } 3182 EXTERN_C_END 3183 3184 EXTERN_C_BEGIN 3185 #undef __FUNCT__ 3186 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3187 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3188 { 3189 Mat_MPIAIJ *b; 3190 PetscErrorCode ierr; 3191 PetscInt i; 3192 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3193 3194 PetscFunctionBegin; 3195 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3196 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3197 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3198 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3199 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3200 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3201 3202 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3203 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3204 if (d_nnz) { 3205 for (i=0; i<B->rmap->n; i++) { 3206 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3207 } 3208 } 3209 if (o_nnz) { 3210 for (i=0; i<B->rmap->n; i++) { 3211 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3212 } 3213 } 3214 b = (Mat_MPIAIJ*)B->data; 3215 3216 if (!B->preallocated) { 3217 /* Explicitly create 2 MATSEQAIJ matrices. */ 3218 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3219 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3220 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3221 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3222 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3223 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3224 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3225 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3226 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3227 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3228 } 3229 3230 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3231 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3232 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3233 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3234 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3235 B->preallocated = PETSC_TRUE; 3236 PetscFunctionReturn(0); 3237 } 3238 EXTERN_C_END 3239 3240 #undef __FUNCT__ 3241 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3242 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3243 { 3244 Mat mat; 3245 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3246 PetscErrorCode ierr; 3247 3248 PetscFunctionBegin; 3249 *newmat = 0; 3250 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3251 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3252 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3253 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3254 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3255 a = (Mat_MPIAIJ*)mat->data; 3256 3257 mat->factortype = matin->factortype; 3258 mat->rmap->bs = matin->rmap->bs; 3259 mat->cmap->bs = matin->cmap->bs; 3260 mat->assembled = PETSC_TRUE; 3261 mat->insertmode = NOT_SET_VALUES; 3262 mat->preallocated = PETSC_TRUE; 3263 3264 a->size = oldmat->size; 3265 a->rank = oldmat->rank; 3266 a->donotstash = oldmat->donotstash; 3267 a->roworiented = oldmat->roworiented; 3268 a->rowindices = 0; 3269 a->rowvalues = 0; 3270 a->getrowactive = PETSC_FALSE; 3271 3272 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3273 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3274 3275 if (oldmat->colmap) { 3276 #if defined (PETSC_USE_CTABLE) 3277 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3278 #else 3279 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3280 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3281 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3282 #endif 3283 } else a->colmap = 0; 3284 if (oldmat->garray) { 3285 PetscInt len; 3286 len = oldmat->B->cmap->n; 3287 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3288 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3289 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3290 } else a->garray = 0; 3291 3292 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3293 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3294 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3295 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3296 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3297 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3298 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3299 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3300 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3301 *newmat = mat; 3302 PetscFunctionReturn(0); 3303 } 3304 3305 3306 3307 #undef __FUNCT__ 3308 #define __FUNCT__ "MatLoad_MPIAIJ" 3309 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3310 { 3311 PetscScalar *vals,*svals; 3312 MPI_Comm comm = ((PetscObject)viewer)->comm; 3313 PetscErrorCode ierr; 3314 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3315 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3316 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3317 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3318 PetscInt cend,cstart,n,*rowners,sizesset=1; 3319 int fd; 3320 PetscInt bs = 1; 3321 3322 PetscFunctionBegin; 3323 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3324 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3325 if (!rank) { 3326 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3327 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3328 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3329 } 3330 3331 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3332 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3333 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3334 3335 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3336 3337 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3338 M = header[1]; N = header[2]; 3339 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3340 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3341 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3342 3343 /* If global sizes are set, check if they are consistent with that given in the file */ 3344 if (sizesset) { 3345 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3346 } 3347 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3348 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3349 3350 /* determine ownership of all (block) rows */ 3351 if( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3352 if (newMat->rmap->n < 0 ) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3353 else m = newMat->rmap->n; /* Set by user */ 3354 3355 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3356 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3357 3358 /* First process needs enough room for process with most rows */ 3359 if (!rank) { 3360 mmax = rowners[1]; 3361 for (i=2; i<size; i++) { 3362 mmax = PetscMax(mmax,rowners[i]); 3363 } 3364 } else mmax = m; 3365 3366 rowners[0] = 0; 3367 for (i=2; i<=size; i++) { 3368 rowners[i] += rowners[i-1]; 3369 } 3370 rstart = rowners[rank]; 3371 rend = rowners[rank+1]; 3372 3373 /* distribute row lengths to all processors */ 3374 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3375 if (!rank) { 3376 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3377 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3378 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3379 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3380 for (j=0; j<m; j++) { 3381 procsnz[0] += ourlens[j]; 3382 } 3383 for (i=1; i<size; i++) { 3384 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3385 /* calculate the number of nonzeros on each processor */ 3386 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3387 procsnz[i] += rowlengths[j]; 3388 } 3389 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3390 } 3391 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3392 } else { 3393 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3394 } 3395 3396 if (!rank) { 3397 /* determine max buffer needed and allocate it */ 3398 maxnz = 0; 3399 for (i=0; i<size; i++) { 3400 maxnz = PetscMax(maxnz,procsnz[i]); 3401 } 3402 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3403 3404 /* read in my part of the matrix column indices */ 3405 nz = procsnz[0]; 3406 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3407 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3408 3409 /* read in every one elses and ship off */ 3410 for (i=1; i<size; i++) { 3411 nz = procsnz[i]; 3412 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3413 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3414 } 3415 ierr = PetscFree(cols);CHKERRQ(ierr); 3416 } else { 3417 /* determine buffer space needed for message */ 3418 nz = 0; 3419 for (i=0; i<m; i++) { 3420 nz += ourlens[i]; 3421 } 3422 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3423 3424 /* receive message of column indices*/ 3425 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3426 } 3427 3428 /* determine column ownership if matrix is not square */ 3429 if (N != M) { 3430 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3431 else n = newMat->cmap->n; 3432 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3433 cstart = cend - n; 3434 } else { 3435 cstart = rstart; 3436 cend = rend; 3437 n = cend - cstart; 3438 } 3439 3440 /* loop over local rows, determining number of off diagonal entries */ 3441 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3442 jj = 0; 3443 for (i=0; i<m; i++) { 3444 for (j=0; j<ourlens[i]; j++) { 3445 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3446 jj++; 3447 } 3448 } 3449 3450 for (i=0; i<m; i++) { 3451 ourlens[i] -= offlens[i]; 3452 } 3453 if (!sizesset) { 3454 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3455 } 3456 3457 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3458 3459 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3460 3461 for (i=0; i<m; i++) { 3462 ourlens[i] += offlens[i]; 3463 } 3464 3465 if (!rank) { 3466 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3467 3468 /* read in my part of the matrix numerical values */ 3469 nz = procsnz[0]; 3470 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3471 3472 /* insert into matrix */ 3473 jj = rstart; 3474 smycols = mycols; 3475 svals = vals; 3476 for (i=0; i<m; i++) { 3477 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3478 smycols += ourlens[i]; 3479 svals += ourlens[i]; 3480 jj++; 3481 } 3482 3483 /* read in other processors and ship out */ 3484 for (i=1; i<size; i++) { 3485 nz = procsnz[i]; 3486 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3487 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3488 } 3489 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3490 } else { 3491 /* receive numeric values */ 3492 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3493 3494 /* receive message of values*/ 3495 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3496 3497 /* insert into matrix */ 3498 jj = rstart; 3499 smycols = mycols; 3500 svals = vals; 3501 for (i=0; i<m; i++) { 3502 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3503 smycols += ourlens[i]; 3504 svals += ourlens[i]; 3505 jj++; 3506 } 3507 } 3508 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3509 ierr = PetscFree(vals);CHKERRQ(ierr); 3510 ierr = PetscFree(mycols);CHKERRQ(ierr); 3511 ierr = PetscFree(rowners);CHKERRQ(ierr); 3512 3513 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3514 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3515 3516 PetscFunctionReturn(0); 3517 } 3518 3519 #undef __FUNCT__ 3520 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3521 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3522 { 3523 PetscErrorCode ierr; 3524 IS iscol_local; 3525 PetscInt csize; 3526 3527 PetscFunctionBegin; 3528 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3529 if (call == MAT_REUSE_MATRIX) { 3530 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3531 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3532 } else { 3533 PetscInt cbs; 3534 ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr); 3535 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3536 ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr); 3537 } 3538 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3539 if (call == MAT_INITIAL_MATRIX) { 3540 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3541 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3542 } 3543 PetscFunctionReturn(0); 3544 } 3545 3546 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3547 #undef __FUNCT__ 3548 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3549 /* 3550 Not great since it makes two copies of the submatrix, first an SeqAIJ 3551 in local and then by concatenating the local matrices the end result. 3552 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3553 3554 Note: This requires a sequential iscol with all indices. 3555 */ 3556 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3557 { 3558 PetscErrorCode ierr; 3559 PetscMPIInt rank,size; 3560 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3561 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3562 PetscBool allcolumns, colflag; 3563 Mat M,Mreuse; 3564 MatScalar *vwork,*aa; 3565 MPI_Comm comm = ((PetscObject)mat)->comm; 3566 Mat_SeqAIJ *aij; 3567 3568 3569 PetscFunctionBegin; 3570 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3571 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3572 3573 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3574 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3575 if (colflag && ncol == mat->cmap->N){ 3576 allcolumns = PETSC_TRUE; 3577 } else { 3578 allcolumns = PETSC_FALSE; 3579 } 3580 if (call == MAT_REUSE_MATRIX) { 3581 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3582 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3583 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3584 } else { 3585 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3586 } 3587 3588 /* 3589 m - number of local rows 3590 n - number of columns (same on all processors) 3591 rstart - first row in new global matrix generated 3592 */ 3593 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3594 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3595 if (call == MAT_INITIAL_MATRIX) { 3596 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3597 ii = aij->i; 3598 jj = aij->j; 3599 3600 /* 3601 Determine the number of non-zeros in the diagonal and off-diagonal 3602 portions of the matrix in order to do correct preallocation 3603 */ 3604 3605 /* first get start and end of "diagonal" columns */ 3606 if (csize == PETSC_DECIDE) { 3607 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3608 if (mglobal == n) { /* square matrix */ 3609 nlocal = m; 3610 } else { 3611 nlocal = n/size + ((n % size) > rank); 3612 } 3613 } else { 3614 nlocal = csize; 3615 } 3616 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3617 rstart = rend - nlocal; 3618 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3619 3620 /* next, compute all the lengths */ 3621 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3622 olens = dlens + m; 3623 for (i=0; i<m; i++) { 3624 jend = ii[i+1] - ii[i]; 3625 olen = 0; 3626 dlen = 0; 3627 for (j=0; j<jend; j++) { 3628 if (*jj < rstart || *jj >= rend) olen++; 3629 else dlen++; 3630 jj++; 3631 } 3632 olens[i] = olen; 3633 dlens[i] = dlen; 3634 } 3635 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3636 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3637 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3638 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3639 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3640 ierr = PetscFree(dlens);CHKERRQ(ierr); 3641 } else { 3642 PetscInt ml,nl; 3643 3644 M = *newmat; 3645 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3646 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3647 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3648 /* 3649 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3650 rather than the slower MatSetValues(). 3651 */ 3652 M->was_assembled = PETSC_TRUE; 3653 M->assembled = PETSC_FALSE; 3654 } 3655 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3656 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3657 ii = aij->i; 3658 jj = aij->j; 3659 aa = aij->a; 3660 for (i=0; i<m; i++) { 3661 row = rstart + i; 3662 nz = ii[i+1] - ii[i]; 3663 cwork = jj; jj += nz; 3664 vwork = aa; aa += nz; 3665 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3666 } 3667 3668 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3669 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3670 *newmat = M; 3671 3672 /* save submatrix used in processor for next request */ 3673 if (call == MAT_INITIAL_MATRIX) { 3674 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3675 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3676 } 3677 3678 PetscFunctionReturn(0); 3679 } 3680 3681 EXTERN_C_BEGIN 3682 #undef __FUNCT__ 3683 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3684 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3685 { 3686 PetscInt m,cstart, cend,j,nnz,i,d; 3687 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3688 const PetscInt *JJ; 3689 PetscScalar *values; 3690 PetscErrorCode ierr; 3691 3692 PetscFunctionBegin; 3693 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3694 3695 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3696 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3697 m = B->rmap->n; 3698 cstart = B->cmap->rstart; 3699 cend = B->cmap->rend; 3700 rstart = B->rmap->rstart; 3701 3702 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3703 3704 #if defined(PETSC_USE_DEBUGGING) 3705 for (i=0; i<m; i++) { 3706 nnz = Ii[i+1]- Ii[i]; 3707 JJ = J + Ii[i]; 3708 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3709 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3710 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3711 } 3712 #endif 3713 3714 for (i=0; i<m; i++) { 3715 nnz = Ii[i+1]- Ii[i]; 3716 JJ = J + Ii[i]; 3717 nnz_max = PetscMax(nnz_max,nnz); 3718 d = 0; 3719 for (j=0; j<nnz; j++) { 3720 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3721 } 3722 d_nnz[i] = d; 3723 o_nnz[i] = nnz - d; 3724 } 3725 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3726 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3727 3728 if (v) values = (PetscScalar*)v; 3729 else { 3730 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3731 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3732 } 3733 3734 for (i=0; i<m; i++) { 3735 ii = i + rstart; 3736 nnz = Ii[i+1]- Ii[i]; 3737 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3738 } 3739 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3740 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3741 3742 if (!v) { 3743 ierr = PetscFree(values);CHKERRQ(ierr); 3744 } 3745 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3746 PetscFunctionReturn(0); 3747 } 3748 EXTERN_C_END 3749 3750 #undef __FUNCT__ 3751 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3752 /*@ 3753 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3754 (the default parallel PETSc format). 3755 3756 Collective on MPI_Comm 3757 3758 Input Parameters: 3759 + B - the matrix 3760 . i - the indices into j for the start of each local row (starts with zero) 3761 . j - the column indices for each local row (starts with zero) 3762 - v - optional values in the matrix 3763 3764 Level: developer 3765 3766 Notes: 3767 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3768 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3769 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3770 3771 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3772 3773 The format which is used for the sparse matrix input, is equivalent to a 3774 row-major ordering.. i.e for the following matrix, the input data expected is 3775 as shown: 3776 3777 1 0 0 3778 2 0 3 P0 3779 ------- 3780 4 5 6 P1 3781 3782 Process0 [P0]: rows_owned=[0,1] 3783 i = {0,1,3} [size = nrow+1 = 2+1] 3784 j = {0,0,2} [size = nz = 6] 3785 v = {1,2,3} [size = nz = 6] 3786 3787 Process1 [P1]: rows_owned=[2] 3788 i = {0,3} [size = nrow+1 = 1+1] 3789 j = {0,1,2} [size = nz = 6] 3790 v = {4,5,6} [size = nz = 6] 3791 3792 .keywords: matrix, aij, compressed row, sparse, parallel 3793 3794 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3795 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3796 @*/ 3797 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3798 { 3799 PetscErrorCode ierr; 3800 3801 PetscFunctionBegin; 3802 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3803 PetscFunctionReturn(0); 3804 } 3805 3806 #undef __FUNCT__ 3807 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3808 /*@C 3809 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3810 (the default parallel PETSc format). For good matrix assembly performance 3811 the user should preallocate the matrix storage by setting the parameters 3812 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3813 performance can be increased by more than a factor of 50. 3814 3815 Collective on MPI_Comm 3816 3817 Input Parameters: 3818 + A - the matrix 3819 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3820 (same value is used for all local rows) 3821 . d_nnz - array containing the number of nonzeros in the various rows of the 3822 DIAGONAL portion of the local submatrix (possibly different for each row) 3823 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3824 The size of this array is equal to the number of local rows, i.e 'm'. 3825 For matrices that will be factored, you must leave room for (and set) 3826 the diagonal entry even if it is zero. 3827 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3828 submatrix (same value is used for all local rows). 3829 - o_nnz - array containing the number of nonzeros in the various rows of the 3830 OFF-DIAGONAL portion of the local submatrix (possibly different for 3831 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3832 structure. The size of this array is equal to the number 3833 of local rows, i.e 'm'. 3834 3835 If the *_nnz parameter is given then the *_nz parameter is ignored 3836 3837 The AIJ format (also called the Yale sparse matrix format or 3838 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3839 storage. The stored row and column indices begin with zero. 3840 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3841 3842 The parallel matrix is partitioned such that the first m0 rows belong to 3843 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3844 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3845 3846 The DIAGONAL portion of the local submatrix of a processor can be defined 3847 as the submatrix which is obtained by extraction the part corresponding to 3848 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3849 first row that belongs to the processor, r2 is the last row belonging to 3850 the this processor, and c1-c2 is range of indices of the local part of a 3851 vector suitable for applying the matrix to. This is an mxn matrix. In the 3852 common case of a square matrix, the row and column ranges are the same and 3853 the DIAGONAL part is also square. The remaining portion of the local 3854 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3855 3856 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3857 3858 You can call MatGetInfo() to get information on how effective the preallocation was; 3859 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3860 You can also run with the option -info and look for messages with the string 3861 malloc in them to see if additional memory allocation was needed. 3862 3863 Example usage: 3864 3865 Consider the following 8x8 matrix with 34 non-zero values, that is 3866 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3867 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3868 as follows: 3869 3870 .vb 3871 1 2 0 | 0 3 0 | 0 4 3872 Proc0 0 5 6 | 7 0 0 | 8 0 3873 9 0 10 | 11 0 0 | 12 0 3874 ------------------------------------- 3875 13 0 14 | 15 16 17 | 0 0 3876 Proc1 0 18 0 | 19 20 21 | 0 0 3877 0 0 0 | 22 23 0 | 24 0 3878 ------------------------------------- 3879 Proc2 25 26 27 | 0 0 28 | 29 0 3880 30 0 0 | 31 32 33 | 0 34 3881 .ve 3882 3883 This can be represented as a collection of submatrices as: 3884 3885 .vb 3886 A B C 3887 D E F 3888 G H I 3889 .ve 3890 3891 Where the submatrices A,B,C are owned by proc0, D,E,F are 3892 owned by proc1, G,H,I are owned by proc2. 3893 3894 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3895 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3896 The 'M','N' parameters are 8,8, and have the same values on all procs. 3897 3898 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3899 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3900 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3901 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3902 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3903 matrix, ans [DF] as another SeqAIJ matrix. 3904 3905 When d_nz, o_nz parameters are specified, d_nz storage elements are 3906 allocated for every row of the local diagonal submatrix, and o_nz 3907 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3908 One way to choose d_nz and o_nz is to use the max nonzerors per local 3909 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3910 In this case, the values of d_nz,o_nz are: 3911 .vb 3912 proc0 : dnz = 2, o_nz = 2 3913 proc1 : dnz = 3, o_nz = 2 3914 proc2 : dnz = 1, o_nz = 4 3915 .ve 3916 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3917 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3918 for proc3. i.e we are using 12+15+10=37 storage locations to store 3919 34 values. 3920 3921 When d_nnz, o_nnz parameters are specified, the storage is specified 3922 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3923 In the above case the values for d_nnz,o_nnz are: 3924 .vb 3925 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3926 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3927 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3928 .ve 3929 Here the space allocated is sum of all the above values i.e 34, and 3930 hence pre-allocation is perfect. 3931 3932 Level: intermediate 3933 3934 .keywords: matrix, aij, compressed row, sparse, parallel 3935 3936 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3937 MPIAIJ, MatGetInfo() 3938 @*/ 3939 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3940 { 3941 PetscErrorCode ierr; 3942 3943 PetscFunctionBegin; 3944 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3945 PetscValidType(B,1); 3946 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3947 PetscFunctionReturn(0); 3948 } 3949 3950 #undef __FUNCT__ 3951 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3952 /*@ 3953 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3954 CSR format the local rows. 3955 3956 Collective on MPI_Comm 3957 3958 Input Parameters: 3959 + comm - MPI communicator 3960 . m - number of local rows (Cannot be PETSC_DECIDE) 3961 . n - This value should be the same as the local size used in creating the 3962 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3963 calculated if N is given) For square matrices n is almost always m. 3964 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3965 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3966 . i - row indices 3967 . j - column indices 3968 - a - matrix values 3969 3970 Output Parameter: 3971 . mat - the matrix 3972 3973 Level: intermediate 3974 3975 Notes: 3976 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3977 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3978 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3979 3980 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3981 3982 The format which is used for the sparse matrix input, is equivalent to a 3983 row-major ordering.. i.e for the following matrix, the input data expected is 3984 as shown: 3985 3986 1 0 0 3987 2 0 3 P0 3988 ------- 3989 4 5 6 P1 3990 3991 Process0 [P0]: rows_owned=[0,1] 3992 i = {0,1,3} [size = nrow+1 = 2+1] 3993 j = {0,0,2} [size = nz = 6] 3994 v = {1,2,3} [size = nz = 6] 3995 3996 Process1 [P1]: rows_owned=[2] 3997 i = {0,3} [size = nrow+1 = 1+1] 3998 j = {0,1,2} [size = nz = 6] 3999 v = {4,5,6} [size = nz = 6] 4000 4001 .keywords: matrix, aij, compressed row, sparse, parallel 4002 4003 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4004 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4005 @*/ 4006 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4007 { 4008 PetscErrorCode ierr; 4009 4010 PetscFunctionBegin; 4011 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4012 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4013 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4014 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4015 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 4016 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4017 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4018 PetscFunctionReturn(0); 4019 } 4020 4021 #undef __FUNCT__ 4022 #define __FUNCT__ "MatCreateAIJ" 4023 /*@C 4024 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4025 (the default parallel PETSc format). For good matrix assembly performance 4026 the user should preallocate the matrix storage by setting the parameters 4027 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4028 performance can be increased by more than a factor of 50. 4029 4030 Collective on MPI_Comm 4031 4032 Input Parameters: 4033 + comm - MPI communicator 4034 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4035 This value should be the same as the local size used in creating the 4036 y vector for the matrix-vector product y = Ax. 4037 . n - This value should be the same as the local size used in creating the 4038 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4039 calculated if N is given) For square matrices n is almost always m. 4040 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4041 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4042 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4043 (same value is used for all local rows) 4044 . d_nnz - array containing the number of nonzeros in the various rows of the 4045 DIAGONAL portion of the local submatrix (possibly different for each row) 4046 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4047 The size of this array is equal to the number of local rows, i.e 'm'. 4048 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4049 submatrix (same value is used for all local rows). 4050 - o_nnz - array containing the number of nonzeros in the various rows of the 4051 OFF-DIAGONAL portion of the local submatrix (possibly different for 4052 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4053 structure. The size of this array is equal to the number 4054 of local rows, i.e 'm'. 4055 4056 Output Parameter: 4057 . A - the matrix 4058 4059 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4060 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4061 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4062 4063 Notes: 4064 If the *_nnz parameter is given then the *_nz parameter is ignored 4065 4066 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4067 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4068 storage requirements for this matrix. 4069 4070 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4071 processor than it must be used on all processors that share the object for 4072 that argument. 4073 4074 The user MUST specify either the local or global matrix dimensions 4075 (possibly both). 4076 4077 The parallel matrix is partitioned across processors such that the 4078 first m0 rows belong to process 0, the next m1 rows belong to 4079 process 1, the next m2 rows belong to process 2 etc.. where 4080 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4081 values corresponding to [m x N] submatrix. 4082 4083 The columns are logically partitioned with the n0 columns belonging 4084 to 0th partition, the next n1 columns belonging to the next 4085 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4086 4087 The DIAGONAL portion of the local submatrix on any given processor 4088 is the submatrix corresponding to the rows and columns m,n 4089 corresponding to the given processor. i.e diagonal matrix on 4090 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4091 etc. The remaining portion of the local submatrix [m x (N-n)] 4092 constitute the OFF-DIAGONAL portion. The example below better 4093 illustrates this concept. 4094 4095 For a square global matrix we define each processor's diagonal portion 4096 to be its local rows and the corresponding columns (a square submatrix); 4097 each processor's off-diagonal portion encompasses the remainder of the 4098 local matrix (a rectangular submatrix). 4099 4100 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4101 4102 When calling this routine with a single process communicator, a matrix of 4103 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4104 type of communicator, use the construction mechanism: 4105 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4106 4107 By default, this format uses inodes (identical nodes) when possible. 4108 We search for consecutive rows with the same nonzero structure, thereby 4109 reusing matrix information to achieve increased efficiency. 4110 4111 Options Database Keys: 4112 + -mat_no_inode - Do not use inodes 4113 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4114 - -mat_aij_oneindex - Internally use indexing starting at 1 4115 rather than 0. Note that when calling MatSetValues(), 4116 the user still MUST index entries starting at 0! 4117 4118 4119 Example usage: 4120 4121 Consider the following 8x8 matrix with 34 non-zero values, that is 4122 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4123 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4124 as follows: 4125 4126 .vb 4127 1 2 0 | 0 3 0 | 0 4 4128 Proc0 0 5 6 | 7 0 0 | 8 0 4129 9 0 10 | 11 0 0 | 12 0 4130 ------------------------------------- 4131 13 0 14 | 15 16 17 | 0 0 4132 Proc1 0 18 0 | 19 20 21 | 0 0 4133 0 0 0 | 22 23 0 | 24 0 4134 ------------------------------------- 4135 Proc2 25 26 27 | 0 0 28 | 29 0 4136 30 0 0 | 31 32 33 | 0 34 4137 .ve 4138 4139 This can be represented as a collection of submatrices as: 4140 4141 .vb 4142 A B C 4143 D E F 4144 G H I 4145 .ve 4146 4147 Where the submatrices A,B,C are owned by proc0, D,E,F are 4148 owned by proc1, G,H,I are owned by proc2. 4149 4150 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4151 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4152 The 'M','N' parameters are 8,8, and have the same values on all procs. 4153 4154 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4155 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4156 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4157 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4158 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4159 matrix, ans [DF] as another SeqAIJ matrix. 4160 4161 When d_nz, o_nz parameters are specified, d_nz storage elements are 4162 allocated for every row of the local diagonal submatrix, and o_nz 4163 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4164 One way to choose d_nz and o_nz is to use the max nonzerors per local 4165 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4166 In this case, the values of d_nz,o_nz are: 4167 .vb 4168 proc0 : dnz = 2, o_nz = 2 4169 proc1 : dnz = 3, o_nz = 2 4170 proc2 : dnz = 1, o_nz = 4 4171 .ve 4172 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4173 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4174 for proc3. i.e we are using 12+15+10=37 storage locations to store 4175 34 values. 4176 4177 When d_nnz, o_nnz parameters are specified, the storage is specified 4178 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4179 In the above case the values for d_nnz,o_nnz are: 4180 .vb 4181 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4182 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4183 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4184 .ve 4185 Here the space allocated is sum of all the above values i.e 34, and 4186 hence pre-allocation is perfect. 4187 4188 Level: intermediate 4189 4190 .keywords: matrix, aij, compressed row, sparse, parallel 4191 4192 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4193 MPIAIJ, MatCreateMPIAIJWithArrays() 4194 @*/ 4195 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4196 { 4197 PetscErrorCode ierr; 4198 PetscMPIInt size; 4199 4200 PetscFunctionBegin; 4201 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4202 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4203 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4204 if (size > 1) { 4205 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4206 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4207 } else { 4208 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4209 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4210 } 4211 PetscFunctionReturn(0); 4212 } 4213 4214 #undef __FUNCT__ 4215 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4216 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4217 { 4218 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4219 4220 PetscFunctionBegin; 4221 *Ad = a->A; 4222 *Ao = a->B; 4223 *colmap = a->garray; 4224 PetscFunctionReturn(0); 4225 } 4226 4227 #undef __FUNCT__ 4228 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4229 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4230 { 4231 PetscErrorCode ierr; 4232 PetscInt i; 4233 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4234 4235 PetscFunctionBegin; 4236 if (coloring->ctype == IS_COLORING_GLOBAL) { 4237 ISColoringValue *allcolors,*colors; 4238 ISColoring ocoloring; 4239 4240 /* set coloring for diagonal portion */ 4241 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4242 4243 /* set coloring for off-diagonal portion */ 4244 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4245 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4246 for (i=0; i<a->B->cmap->n; i++) { 4247 colors[i] = allcolors[a->garray[i]]; 4248 } 4249 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4250 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4251 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4252 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4253 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4254 ISColoringValue *colors; 4255 PetscInt *larray; 4256 ISColoring ocoloring; 4257 4258 /* set coloring for diagonal portion */ 4259 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4260 for (i=0; i<a->A->cmap->n; i++) { 4261 larray[i] = i + A->cmap->rstart; 4262 } 4263 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4264 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4265 for (i=0; i<a->A->cmap->n; i++) { 4266 colors[i] = coloring->colors[larray[i]]; 4267 } 4268 ierr = PetscFree(larray);CHKERRQ(ierr); 4269 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4270 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4271 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4272 4273 /* set coloring for off-diagonal portion */ 4274 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4275 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4276 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4277 for (i=0; i<a->B->cmap->n; i++) { 4278 colors[i] = coloring->colors[larray[i]]; 4279 } 4280 ierr = PetscFree(larray);CHKERRQ(ierr); 4281 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4282 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4283 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4284 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4285 4286 PetscFunctionReturn(0); 4287 } 4288 4289 #if defined(PETSC_HAVE_ADIC) 4290 #undef __FUNCT__ 4291 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4292 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4293 { 4294 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4295 PetscErrorCode ierr; 4296 4297 PetscFunctionBegin; 4298 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4299 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4300 PetscFunctionReturn(0); 4301 } 4302 #endif 4303 4304 #undef __FUNCT__ 4305 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4306 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4307 { 4308 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4309 PetscErrorCode ierr; 4310 4311 PetscFunctionBegin; 4312 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4313 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4314 PetscFunctionReturn(0); 4315 } 4316 4317 #undef __FUNCT__ 4318 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4319 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4320 { 4321 PetscErrorCode ierr; 4322 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4323 PetscInt *indx; 4324 4325 PetscFunctionBegin; 4326 /* This routine will ONLY return MPIAIJ type matrix */ 4327 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4328 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4329 if (n == PETSC_DECIDE){ 4330 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4331 } 4332 /* Check sum(n) = N */ 4333 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4334 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4335 4336 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4337 rstart -= m; 4338 4339 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4340 for (i=0;i<m;i++) { 4341 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4342 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4343 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4344 } 4345 4346 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4347 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4348 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4349 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4350 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4351 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4352 PetscFunctionReturn(0); 4353 } 4354 4355 #undef __FUNCT__ 4356 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4357 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4358 { 4359 PetscErrorCode ierr; 4360 PetscInt m,N,i,rstart,nnz,Ii; 4361 PetscInt *indx; 4362 PetscScalar *values; 4363 4364 PetscFunctionBegin; 4365 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4366 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4367 for (i=0;i<m;i++) { 4368 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4369 Ii = i + rstart; 4370 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4371 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4372 } 4373 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4374 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4375 PetscFunctionReturn(0); 4376 } 4377 4378 #undef __FUNCT__ 4379 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4380 /*@ 4381 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4382 matrices from each processor 4383 4384 Collective on MPI_Comm 4385 4386 Input Parameters: 4387 + comm - the communicators the parallel matrix will live on 4388 . inmat - the input sequential matrices 4389 . n - number of local columns (or PETSC_DECIDE) 4390 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4391 4392 Output Parameter: 4393 . outmat - the parallel matrix generated 4394 4395 Level: advanced 4396 4397 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4398 4399 @*/ 4400 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4401 { 4402 PetscErrorCode ierr; 4403 4404 PetscFunctionBegin; 4405 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4406 if (scall == MAT_INITIAL_MATRIX){ 4407 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4408 } 4409 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4410 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4411 PetscFunctionReturn(0); 4412 } 4413 4414 #undef __FUNCT__ 4415 #define __FUNCT__ "MatFileSplit" 4416 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4417 { 4418 PetscErrorCode ierr; 4419 PetscMPIInt rank; 4420 PetscInt m,N,i,rstart,nnz; 4421 size_t len; 4422 const PetscInt *indx; 4423 PetscViewer out; 4424 char *name; 4425 Mat B; 4426 const PetscScalar *values; 4427 4428 PetscFunctionBegin; 4429 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4430 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4431 /* Should this be the type of the diagonal block of A? */ 4432 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4433 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4434 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4435 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4436 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4437 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4438 for (i=0;i<m;i++) { 4439 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4440 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4441 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4442 } 4443 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4444 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4445 4446 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4447 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4448 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4449 sprintf(name,"%s.%d",outfile,rank); 4450 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4451 ierr = PetscFree(name); 4452 ierr = MatView(B,out);CHKERRQ(ierr); 4453 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4454 ierr = MatDestroy(&B);CHKERRQ(ierr); 4455 PetscFunctionReturn(0); 4456 } 4457 4458 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4459 #undef __FUNCT__ 4460 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4461 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4462 { 4463 PetscErrorCode ierr; 4464 Mat_Merge_SeqsToMPI *merge; 4465 PetscContainer container; 4466 4467 PetscFunctionBegin; 4468 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4469 if (container) { 4470 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4471 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4472 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4473 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4474 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4475 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4476 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4477 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4478 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4479 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4480 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4481 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4482 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4483 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4484 ierr = PetscFree(merge);CHKERRQ(ierr); 4485 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4486 } 4487 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4488 PetscFunctionReturn(0); 4489 } 4490 4491 #include <../src/mat/utils/freespace.h> 4492 #include <petscbt.h> 4493 4494 #undef __FUNCT__ 4495 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4496 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4497 { 4498 PetscErrorCode ierr; 4499 MPI_Comm comm=((PetscObject)mpimat)->comm; 4500 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4501 PetscMPIInt size,rank,taga,*len_s; 4502 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4503 PetscInt proc,m; 4504 PetscInt **buf_ri,**buf_rj; 4505 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4506 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4507 MPI_Request *s_waits,*r_waits; 4508 MPI_Status *status; 4509 MatScalar *aa=a->a; 4510 MatScalar **abuf_r,*ba_i; 4511 Mat_Merge_SeqsToMPI *merge; 4512 PetscContainer container; 4513 4514 PetscFunctionBegin; 4515 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4516 4517 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4518 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4519 4520 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4521 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4522 4523 bi = merge->bi; 4524 bj = merge->bj; 4525 buf_ri = merge->buf_ri; 4526 buf_rj = merge->buf_rj; 4527 4528 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4529 owners = merge->rowmap->range; 4530 len_s = merge->len_s; 4531 4532 /* send and recv matrix values */ 4533 /*-----------------------------*/ 4534 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4535 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4536 4537 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4538 for (proc=0,k=0; proc<size; proc++){ 4539 if (!len_s[proc]) continue; 4540 i = owners[proc]; 4541 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4542 k++; 4543 } 4544 4545 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4546 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4547 ierr = PetscFree(status);CHKERRQ(ierr); 4548 4549 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4550 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4551 4552 /* insert mat values of mpimat */ 4553 /*----------------------------*/ 4554 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4555 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4556 4557 for (k=0; k<merge->nrecv; k++){ 4558 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4559 nrows = *(buf_ri_k[k]); 4560 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4561 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4562 } 4563 4564 /* set values of ba */ 4565 m = merge->rowmap->n; 4566 for (i=0; i<m; i++) { 4567 arow = owners[rank] + i; 4568 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4569 bnzi = bi[i+1] - bi[i]; 4570 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4571 4572 /* add local non-zero vals of this proc's seqmat into ba */ 4573 anzi = ai[arow+1] - ai[arow]; 4574 aj = a->j + ai[arow]; 4575 aa = a->a + ai[arow]; 4576 nextaj = 0; 4577 for (j=0; nextaj<anzi; j++){ 4578 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4579 ba_i[j] += aa[nextaj++]; 4580 } 4581 } 4582 4583 /* add received vals into ba */ 4584 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4585 /* i-th row */ 4586 if (i == *nextrow[k]) { 4587 anzi = *(nextai[k]+1) - *nextai[k]; 4588 aj = buf_rj[k] + *(nextai[k]); 4589 aa = abuf_r[k] + *(nextai[k]); 4590 nextaj = 0; 4591 for (j=0; nextaj<anzi; j++){ 4592 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4593 ba_i[j] += aa[nextaj++]; 4594 } 4595 } 4596 nextrow[k]++; nextai[k]++; 4597 } 4598 } 4599 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4600 } 4601 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4602 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4603 4604 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4605 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4606 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4607 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4608 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4609 PetscFunctionReturn(0); 4610 } 4611 4612 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4613 4614 #undef __FUNCT__ 4615 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4616 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4617 { 4618 PetscErrorCode ierr; 4619 Mat B_mpi; 4620 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4621 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4622 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4623 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4624 PetscInt len,proc,*dnz,*onz,bs,cbs; 4625 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4626 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4627 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4628 MPI_Status *status; 4629 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4630 PetscBT lnkbt; 4631 Mat_Merge_SeqsToMPI *merge; 4632 PetscContainer container; 4633 4634 PetscFunctionBegin; 4635 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4636 4637 /* make sure it is a PETSc comm */ 4638 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4639 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4640 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4641 4642 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4643 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4644 4645 /* determine row ownership */ 4646 /*---------------------------------------------------------*/ 4647 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4648 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4649 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4650 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4651 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4652 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4653 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4654 4655 m = merge->rowmap->n; 4656 M = merge->rowmap->N; 4657 owners = merge->rowmap->range; 4658 4659 /* determine the number of messages to send, their lengths */ 4660 /*---------------------------------------------------------*/ 4661 len_s = merge->len_s; 4662 4663 len = 0; /* length of buf_si[] */ 4664 merge->nsend = 0; 4665 for (proc=0; proc<size; proc++){ 4666 len_si[proc] = 0; 4667 if (proc == rank){ 4668 len_s[proc] = 0; 4669 } else { 4670 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4671 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4672 } 4673 if (len_s[proc]) { 4674 merge->nsend++; 4675 nrows = 0; 4676 for (i=owners[proc]; i<owners[proc+1]; i++){ 4677 if (ai[i+1] > ai[i]) nrows++; 4678 } 4679 len_si[proc] = 2*(nrows+1); 4680 len += len_si[proc]; 4681 } 4682 } 4683 4684 /* determine the number and length of messages to receive for ij-structure */ 4685 /*-------------------------------------------------------------------------*/ 4686 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4687 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4688 4689 /* post the Irecv of j-structure */ 4690 /*-------------------------------*/ 4691 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4692 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4693 4694 /* post the Isend of j-structure */ 4695 /*--------------------------------*/ 4696 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4697 4698 for (proc=0, k=0; proc<size; proc++){ 4699 if (!len_s[proc]) continue; 4700 i = owners[proc]; 4701 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4702 k++; 4703 } 4704 4705 /* receives and sends of j-structure are complete */ 4706 /*------------------------------------------------*/ 4707 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4708 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4709 4710 /* send and recv i-structure */ 4711 /*---------------------------*/ 4712 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4713 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4714 4715 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4716 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4717 for (proc=0,k=0; proc<size; proc++){ 4718 if (!len_s[proc]) continue; 4719 /* form outgoing message for i-structure: 4720 buf_si[0]: nrows to be sent 4721 [1:nrows]: row index (global) 4722 [nrows+1:2*nrows+1]: i-structure index 4723 */ 4724 /*-------------------------------------------*/ 4725 nrows = len_si[proc]/2 - 1; 4726 buf_si_i = buf_si + nrows+1; 4727 buf_si[0] = nrows; 4728 buf_si_i[0] = 0; 4729 nrows = 0; 4730 for (i=owners[proc]; i<owners[proc+1]; i++){ 4731 anzi = ai[i+1] - ai[i]; 4732 if (anzi) { 4733 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4734 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4735 nrows++; 4736 } 4737 } 4738 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4739 k++; 4740 buf_si += len_si[proc]; 4741 } 4742 4743 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4744 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4745 4746 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4747 for (i=0; i<merge->nrecv; i++){ 4748 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4749 } 4750 4751 ierr = PetscFree(len_si);CHKERRQ(ierr); 4752 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4753 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4754 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4755 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4756 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4757 ierr = PetscFree(status);CHKERRQ(ierr); 4758 4759 /* compute a local seq matrix in each processor */ 4760 /*----------------------------------------------*/ 4761 /* allocate bi array and free space for accumulating nonzero column info */ 4762 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4763 bi[0] = 0; 4764 4765 /* create and initialize a linked list */ 4766 nlnk = N+1; 4767 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4768 4769 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4770 len = 0; 4771 len = ai[owners[rank+1]] - ai[owners[rank]]; 4772 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4773 current_space = free_space; 4774 4775 /* determine symbolic info for each local row */ 4776 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4777 4778 for (k=0; k<merge->nrecv; k++){ 4779 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4780 nrows = *buf_ri_k[k]; 4781 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4782 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4783 } 4784 4785 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4786 len = 0; 4787 for (i=0;i<m;i++) { 4788 bnzi = 0; 4789 /* add local non-zero cols of this proc's seqmat into lnk */ 4790 arow = owners[rank] + i; 4791 anzi = ai[arow+1] - ai[arow]; 4792 aj = a->j + ai[arow]; 4793 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4794 bnzi += nlnk; 4795 /* add received col data into lnk */ 4796 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4797 if (i == *nextrow[k]) { /* i-th row */ 4798 anzi = *(nextai[k]+1) - *nextai[k]; 4799 aj = buf_rj[k] + *nextai[k]; 4800 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4801 bnzi += nlnk; 4802 nextrow[k]++; nextai[k]++; 4803 } 4804 } 4805 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4806 4807 /* if free space is not available, make more free space */ 4808 if (current_space->local_remaining<bnzi) { 4809 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4810 nspacedouble++; 4811 } 4812 /* copy data into free space, then initialize lnk */ 4813 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4814 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4815 4816 current_space->array += bnzi; 4817 current_space->local_used += bnzi; 4818 current_space->local_remaining -= bnzi; 4819 4820 bi[i+1] = bi[i] + bnzi; 4821 } 4822 4823 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4824 4825 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4826 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4827 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4828 4829 /* create symbolic parallel matrix B_mpi */ 4830 /*---------------------------------------*/ 4831 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4832 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4833 if (n==PETSC_DECIDE) { 4834 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4835 } else { 4836 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4837 } 4838 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4839 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4840 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4841 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4842 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4843 4844 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4845 B_mpi->assembled = PETSC_FALSE; 4846 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4847 merge->bi = bi; 4848 merge->bj = bj; 4849 merge->buf_ri = buf_ri; 4850 merge->buf_rj = buf_rj; 4851 merge->coi = PETSC_NULL; 4852 merge->coj = PETSC_NULL; 4853 merge->owners_co = PETSC_NULL; 4854 4855 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4856 4857 /* attach the supporting struct to B_mpi for reuse */ 4858 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4859 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4860 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4861 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4862 *mpimat = B_mpi; 4863 4864 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4865 PetscFunctionReturn(0); 4866 } 4867 4868 #undef __FUNCT__ 4869 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4870 /*@C 4871 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4872 matrices from each processor 4873 4874 Collective on MPI_Comm 4875 4876 Input Parameters: 4877 + comm - the communicators the parallel matrix will live on 4878 . seqmat - the input sequential matrices 4879 . m - number of local rows (or PETSC_DECIDE) 4880 . n - number of local columns (or PETSC_DECIDE) 4881 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4882 4883 Output Parameter: 4884 . mpimat - the parallel matrix generated 4885 4886 Level: advanced 4887 4888 Notes: 4889 The dimensions of the sequential matrix in each processor MUST be the same. 4890 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4891 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4892 @*/ 4893 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4894 { 4895 PetscErrorCode ierr; 4896 PetscMPIInt size; 4897 4898 PetscFunctionBegin; 4899 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4900 if (size == 1){ 4901 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4902 if (scall == MAT_INITIAL_MATRIX){ 4903 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4904 } else { 4905 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4906 } 4907 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4908 PetscFunctionReturn(0); 4909 } 4910 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4911 if (scall == MAT_INITIAL_MATRIX){ 4912 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4913 } 4914 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4915 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4916 PetscFunctionReturn(0); 4917 } 4918 4919 #undef __FUNCT__ 4920 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4921 /*@ 4922 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4923 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4924 with MatGetSize() 4925 4926 Not Collective 4927 4928 Input Parameters: 4929 + A - the matrix 4930 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4931 4932 Output Parameter: 4933 . A_loc - the local sequential matrix generated 4934 4935 Level: developer 4936 4937 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4938 4939 @*/ 4940 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4941 { 4942 PetscErrorCode ierr; 4943 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4944 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4945 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4946 MatScalar *aa=a->a,*ba=b->a,*cam; 4947 PetscScalar *ca; 4948 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4949 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4950 PetscBool match; 4951 4952 PetscFunctionBegin; 4953 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4954 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4955 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4956 if (scall == MAT_INITIAL_MATRIX){ 4957 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4958 ci[0] = 0; 4959 for (i=0; i<am; i++){ 4960 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4961 } 4962 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4963 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4964 k = 0; 4965 for (i=0; i<am; i++) { 4966 ncols_o = bi[i+1] - bi[i]; 4967 ncols_d = ai[i+1] - ai[i]; 4968 /* off-diagonal portion of A */ 4969 for (jo=0; jo<ncols_o; jo++) { 4970 col = cmap[*bj]; 4971 if (col >= cstart) break; 4972 cj[k] = col; bj++; 4973 ca[k++] = *ba++; 4974 } 4975 /* diagonal portion of A */ 4976 for (j=0; j<ncols_d; j++) { 4977 cj[k] = cstart + *aj++; 4978 ca[k++] = *aa++; 4979 } 4980 /* off-diagonal portion of A */ 4981 for (j=jo; j<ncols_o; j++) { 4982 cj[k] = cmap[*bj++]; 4983 ca[k++] = *ba++; 4984 } 4985 } 4986 /* put together the new matrix */ 4987 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4988 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4989 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4990 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4991 mat->free_a = PETSC_TRUE; 4992 mat->free_ij = PETSC_TRUE; 4993 mat->nonew = 0; 4994 } else if (scall == MAT_REUSE_MATRIX){ 4995 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4996 ci = mat->i; cj = mat->j; cam = mat->a; 4997 for (i=0; i<am; i++) { 4998 /* off-diagonal portion of A */ 4999 ncols_o = bi[i+1] - bi[i]; 5000 for (jo=0; jo<ncols_o; jo++) { 5001 col = cmap[*bj]; 5002 if (col >= cstart) break; 5003 *cam++ = *ba++; bj++; 5004 } 5005 /* diagonal portion of A */ 5006 ncols_d = ai[i+1] - ai[i]; 5007 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5008 /* off-diagonal portion of A */ 5009 for (j=jo; j<ncols_o; j++) { 5010 *cam++ = *ba++; bj++; 5011 } 5012 } 5013 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5014 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5015 PetscFunctionReturn(0); 5016 } 5017 5018 #undef __FUNCT__ 5019 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5020 /*@C 5021 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5022 5023 Not Collective 5024 5025 Input Parameters: 5026 + A - the matrix 5027 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5028 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5029 5030 Output Parameter: 5031 . A_loc - the local sequential matrix generated 5032 5033 Level: developer 5034 5035 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5036 5037 @*/ 5038 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5039 { 5040 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5041 PetscErrorCode ierr; 5042 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5043 IS isrowa,iscola; 5044 Mat *aloc; 5045 PetscBool match; 5046 5047 PetscFunctionBegin; 5048 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5049 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5050 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5051 if (!row){ 5052 start = A->rmap->rstart; end = A->rmap->rend; 5053 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5054 } else { 5055 isrowa = *row; 5056 } 5057 if (!col){ 5058 start = A->cmap->rstart; 5059 cmap = a->garray; 5060 nzA = a->A->cmap->n; 5061 nzB = a->B->cmap->n; 5062 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5063 ncols = 0; 5064 for (i=0; i<nzB; i++) { 5065 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5066 else break; 5067 } 5068 imark = i; 5069 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5070 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5071 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5072 } else { 5073 iscola = *col; 5074 } 5075 if (scall != MAT_INITIAL_MATRIX){ 5076 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5077 aloc[0] = *A_loc; 5078 } 5079 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5080 *A_loc = aloc[0]; 5081 ierr = PetscFree(aloc);CHKERRQ(ierr); 5082 if (!row){ 5083 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5084 } 5085 if (!col){ 5086 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5087 } 5088 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5089 PetscFunctionReturn(0); 5090 } 5091 5092 #undef __FUNCT__ 5093 #define __FUNCT__ "MatGetBrowsOfAcols" 5094 /*@C 5095 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5096 5097 Collective on Mat 5098 5099 Input Parameters: 5100 + A,B - the matrices in mpiaij format 5101 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5102 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5103 5104 Output Parameter: 5105 + rowb, colb - index sets of rows and columns of B to extract 5106 - B_seq - the sequential matrix generated 5107 5108 Level: developer 5109 5110 @*/ 5111 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5112 { 5113 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5114 PetscErrorCode ierr; 5115 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5116 IS isrowb,iscolb; 5117 Mat *bseq=PETSC_NULL; 5118 5119 PetscFunctionBegin; 5120 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5121 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5122 } 5123 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5124 5125 if (scall == MAT_INITIAL_MATRIX){ 5126 start = A->cmap->rstart; 5127 cmap = a->garray; 5128 nzA = a->A->cmap->n; 5129 nzB = a->B->cmap->n; 5130 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5131 ncols = 0; 5132 for (i=0; i<nzB; i++) { /* row < local row index */ 5133 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5134 else break; 5135 } 5136 imark = i; 5137 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5138 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5139 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5140 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5141 } else { 5142 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5143 isrowb = *rowb; iscolb = *colb; 5144 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5145 bseq[0] = *B_seq; 5146 } 5147 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5148 *B_seq = bseq[0]; 5149 ierr = PetscFree(bseq);CHKERRQ(ierr); 5150 if (!rowb){ 5151 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5152 } else { 5153 *rowb = isrowb; 5154 } 5155 if (!colb){ 5156 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5157 } else { 5158 *colb = iscolb; 5159 } 5160 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5161 PetscFunctionReturn(0); 5162 } 5163 5164 #undef __FUNCT__ 5165 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5166 /* 5167 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5168 of the OFF-DIAGONAL portion of local A 5169 5170 Collective on Mat 5171 5172 Input Parameters: 5173 + A,B - the matrices in mpiaij format 5174 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5175 5176 Output Parameter: 5177 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5178 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5179 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5180 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5181 5182 Level: developer 5183 5184 */ 5185 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5186 { 5187 VecScatter_MPI_General *gen_to,*gen_from; 5188 PetscErrorCode ierr; 5189 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5190 Mat_SeqAIJ *b_oth; 5191 VecScatter ctx=a->Mvctx; 5192 MPI_Comm comm=((PetscObject)ctx)->comm; 5193 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5194 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5195 PetscScalar *rvalues,*svalues; 5196 MatScalar *b_otha,*bufa,*bufA; 5197 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5198 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5199 MPI_Status *sstatus,rstatus; 5200 PetscMPIInt jj; 5201 PetscInt *cols,sbs,rbs; 5202 PetscScalar *vals; 5203 5204 PetscFunctionBegin; 5205 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5206 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5207 } 5208 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5209 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5210 5211 gen_to = (VecScatter_MPI_General*)ctx->todata; 5212 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5213 rvalues = gen_from->values; /* holds the length of receiving row */ 5214 svalues = gen_to->values; /* holds the length of sending row */ 5215 nrecvs = gen_from->n; 5216 nsends = gen_to->n; 5217 5218 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5219 srow = gen_to->indices; /* local row index to be sent */ 5220 sstarts = gen_to->starts; 5221 sprocs = gen_to->procs; 5222 sstatus = gen_to->sstatus; 5223 sbs = gen_to->bs; 5224 rstarts = gen_from->starts; 5225 rprocs = gen_from->procs; 5226 rbs = gen_from->bs; 5227 5228 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5229 if (scall == MAT_INITIAL_MATRIX){ 5230 /* i-array */ 5231 /*---------*/ 5232 /* post receives */ 5233 for (i=0; i<nrecvs; i++){ 5234 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5235 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5236 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5237 } 5238 5239 /* pack the outgoing message */ 5240 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5241 sstartsj[0] = 0; rstartsj[0] = 0; 5242 len = 0; /* total length of j or a array to be sent */ 5243 k = 0; 5244 for (i=0; i<nsends; i++){ 5245 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5246 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5247 for (j=0; j<nrows; j++) { 5248 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5249 for (l=0; l<sbs; l++){ 5250 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5251 rowlen[j*sbs+l] = ncols; 5252 len += ncols; 5253 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5254 } 5255 k++; 5256 } 5257 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5258 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5259 } 5260 /* recvs and sends of i-array are completed */ 5261 i = nrecvs; 5262 while (i--) { 5263 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5264 } 5265 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5266 5267 /* allocate buffers for sending j and a arrays */ 5268 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5269 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5270 5271 /* create i-array of B_oth */ 5272 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5273 b_othi[0] = 0; 5274 len = 0; /* total length of j or a array to be received */ 5275 k = 0; 5276 for (i=0; i<nrecvs; i++){ 5277 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5278 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5279 for (j=0; j<nrows; j++) { 5280 b_othi[k+1] = b_othi[k] + rowlen[j]; 5281 len += rowlen[j]; k++; 5282 } 5283 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5284 } 5285 5286 /* allocate space for j and a arrrays of B_oth */ 5287 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5288 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5289 5290 /* j-array */ 5291 /*---------*/ 5292 /* post receives of j-array */ 5293 for (i=0; i<nrecvs; i++){ 5294 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5295 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5296 } 5297 5298 /* pack the outgoing message j-array */ 5299 k = 0; 5300 for (i=0; i<nsends; i++){ 5301 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5302 bufJ = bufj+sstartsj[i]; 5303 for (j=0; j<nrows; j++) { 5304 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5305 for (ll=0; ll<sbs; ll++){ 5306 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5307 for (l=0; l<ncols; l++){ 5308 *bufJ++ = cols[l]; 5309 } 5310 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5311 } 5312 } 5313 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5314 } 5315 5316 /* recvs and sends of j-array are completed */ 5317 i = nrecvs; 5318 while (i--) { 5319 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5320 } 5321 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5322 } else if (scall == MAT_REUSE_MATRIX){ 5323 sstartsj = *startsj_s; 5324 rstartsj = *startsj_r; 5325 bufa = *bufa_ptr; 5326 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5327 b_otha = b_oth->a; 5328 } else { 5329 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5330 } 5331 5332 /* a-array */ 5333 /*---------*/ 5334 /* post receives of a-array */ 5335 for (i=0; i<nrecvs; i++){ 5336 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5337 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5338 } 5339 5340 /* pack the outgoing message a-array */ 5341 k = 0; 5342 for (i=0; i<nsends; i++){ 5343 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5344 bufA = bufa+sstartsj[i]; 5345 for (j=0; j<nrows; j++) { 5346 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5347 for (ll=0; ll<sbs; ll++){ 5348 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5349 for (l=0; l<ncols; l++){ 5350 *bufA++ = vals[l]; 5351 } 5352 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5353 } 5354 } 5355 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5356 } 5357 /* recvs and sends of a-array are completed */ 5358 i = nrecvs; 5359 while (i--) { 5360 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5361 } 5362 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5363 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5364 5365 if (scall == MAT_INITIAL_MATRIX){ 5366 /* put together the new matrix */ 5367 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5368 5369 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5370 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5371 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5372 b_oth->free_a = PETSC_TRUE; 5373 b_oth->free_ij = PETSC_TRUE; 5374 b_oth->nonew = 0; 5375 5376 ierr = PetscFree(bufj);CHKERRQ(ierr); 5377 if (!startsj_s || !bufa_ptr){ 5378 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5379 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5380 } else { 5381 *startsj_s = sstartsj; 5382 *startsj_r = rstartsj; 5383 *bufa_ptr = bufa; 5384 } 5385 } 5386 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5387 PetscFunctionReturn(0); 5388 } 5389 5390 #undef __FUNCT__ 5391 #define __FUNCT__ "MatGetCommunicationStructs" 5392 /*@C 5393 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5394 5395 Not Collective 5396 5397 Input Parameters: 5398 . A - The matrix in mpiaij format 5399 5400 Output Parameter: 5401 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5402 . colmap - A map from global column index to local index into lvec 5403 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5404 5405 Level: developer 5406 5407 @*/ 5408 #if defined (PETSC_USE_CTABLE) 5409 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5410 #else 5411 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5412 #endif 5413 { 5414 Mat_MPIAIJ *a; 5415 5416 PetscFunctionBegin; 5417 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5418 PetscValidPointer(lvec, 2); 5419 PetscValidPointer(colmap, 3); 5420 PetscValidPointer(multScatter, 4); 5421 a = (Mat_MPIAIJ *) A->data; 5422 if (lvec) *lvec = a->lvec; 5423 if (colmap) *colmap = a->colmap; 5424 if (multScatter) *multScatter = a->Mvctx; 5425 PetscFunctionReturn(0); 5426 } 5427 5428 EXTERN_C_BEGIN 5429 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5430 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5431 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5432 EXTERN_C_END 5433 5434 #undef __FUNCT__ 5435 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5436 /* 5437 Computes (B'*A')' since computing B*A directly is untenable 5438 5439 n p p 5440 ( ) ( ) ( ) 5441 m ( A ) * n ( B ) = m ( C ) 5442 ( ) ( ) ( ) 5443 5444 */ 5445 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5446 { 5447 PetscErrorCode ierr; 5448 Mat At,Bt,Ct; 5449 5450 PetscFunctionBegin; 5451 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5452 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5453 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5454 ierr = MatDestroy(&At);CHKERRQ(ierr); 5455 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5456 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5457 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5458 PetscFunctionReturn(0); 5459 } 5460 5461 #undef __FUNCT__ 5462 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5463 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5464 { 5465 PetscErrorCode ierr; 5466 PetscInt m=A->rmap->n,n=B->cmap->n; 5467 Mat Cmat; 5468 5469 PetscFunctionBegin; 5470 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5471 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5472 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5473 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5474 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5475 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5476 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5477 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5478 *C = Cmat; 5479 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5480 PetscFunctionReturn(0); 5481 } 5482 5483 /* ----------------------------------------------------------------*/ 5484 #undef __FUNCT__ 5485 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5486 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5487 { 5488 PetscErrorCode ierr; 5489 5490 PetscFunctionBegin; 5491 if (scall == MAT_INITIAL_MATRIX){ 5492 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5493 } 5494 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5495 PetscFunctionReturn(0); 5496 } 5497 5498 EXTERN_C_BEGIN 5499 #if defined(PETSC_HAVE_MUMPS) 5500 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5501 #endif 5502 #if defined(PETSC_HAVE_PASTIX) 5503 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5504 #endif 5505 #if defined(PETSC_HAVE_SUPERLU_DIST) 5506 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5507 #endif 5508 #if defined(PETSC_HAVE_SPOOLES) 5509 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5510 #endif 5511 EXTERN_C_END 5512 5513 /*MC 5514 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5515 5516 Options Database Keys: 5517 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5518 5519 Level: beginner 5520 5521 .seealso: MatCreateAIJ() 5522 M*/ 5523 5524 EXTERN_C_BEGIN 5525 #undef __FUNCT__ 5526 #define __FUNCT__ "MatCreate_MPIAIJ" 5527 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5528 { 5529 Mat_MPIAIJ *b; 5530 PetscErrorCode ierr; 5531 PetscMPIInt size; 5532 5533 PetscFunctionBegin; 5534 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5535 5536 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5537 B->data = (void*)b; 5538 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5539 B->assembled = PETSC_FALSE; 5540 5541 B->insertmode = NOT_SET_VALUES; 5542 b->size = size; 5543 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5544 5545 /* build cache for off array entries formed */ 5546 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5547 b->donotstash = PETSC_FALSE; 5548 b->colmap = 0; 5549 b->garray = 0; 5550 b->roworiented = PETSC_TRUE; 5551 5552 /* stuff used for matrix vector multiply */ 5553 b->lvec = PETSC_NULL; 5554 b->Mvctx = PETSC_NULL; 5555 5556 /* stuff for MatGetRow() */ 5557 b->rowindices = 0; 5558 b->rowvalues = 0; 5559 b->getrowactive = PETSC_FALSE; 5560 5561 #if defined(PETSC_HAVE_SPOOLES) 5562 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5563 "MatGetFactor_mpiaij_spooles", 5564 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5565 #endif 5566 #if defined(PETSC_HAVE_MUMPS) 5567 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5568 "MatGetFactor_aij_mumps", 5569 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5570 #endif 5571 #if defined(PETSC_HAVE_PASTIX) 5572 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5573 "MatGetFactor_mpiaij_pastix", 5574 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5575 #endif 5576 #if defined(PETSC_HAVE_SUPERLU_DIST) 5577 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5578 "MatGetFactor_mpiaij_superlu_dist", 5579 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5580 #endif 5581 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5582 "MatStoreValues_MPIAIJ", 5583 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5584 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5585 "MatRetrieveValues_MPIAIJ", 5586 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5587 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5588 "MatGetDiagonalBlock_MPIAIJ", 5589 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5590 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5591 "MatIsTranspose_MPIAIJ", 5592 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5593 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5594 "MatMPIAIJSetPreallocation_MPIAIJ", 5595 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5596 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5597 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5598 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5599 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5600 "MatDiagonalScaleLocal_MPIAIJ", 5601 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5602 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5603 "MatConvert_MPIAIJ_MPIAIJPERM", 5604 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5605 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5606 "MatConvert_MPIAIJ_MPIAIJCRL", 5607 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5608 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5609 "MatConvert_MPIAIJ_MPISBAIJ", 5610 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5611 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5612 "MatMatMult_MPIDense_MPIAIJ", 5613 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5614 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5615 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5616 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5617 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5618 "MatMatMultNumeric_MPIDense_MPIAIJ", 5619 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5620 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5621 PetscFunctionReturn(0); 5622 } 5623 EXTERN_C_END 5624 5625 #undef __FUNCT__ 5626 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5627 /*@ 5628 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5629 and "off-diagonal" part of the matrix in CSR format. 5630 5631 Collective on MPI_Comm 5632 5633 Input Parameters: 5634 + comm - MPI communicator 5635 . m - number of local rows (Cannot be PETSC_DECIDE) 5636 . n - This value should be the same as the local size used in creating the 5637 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5638 calculated if N is given) For square matrices n is almost always m. 5639 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5640 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5641 . i - row indices for "diagonal" portion of matrix 5642 . j - column indices 5643 . a - matrix values 5644 . oi - row indices for "off-diagonal" portion of matrix 5645 . oj - column indices 5646 - oa - matrix values 5647 5648 Output Parameter: 5649 . mat - the matrix 5650 5651 Level: advanced 5652 5653 Notes: 5654 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5655 must free the arrays once the matrix has been destroyed and not before. 5656 5657 The i and j indices are 0 based 5658 5659 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5660 5661 This sets local rows and cannot be used to set off-processor values. 5662 5663 You cannot later use MatSetValues() to change values in this matrix. 5664 5665 .keywords: matrix, aij, compressed row, sparse, parallel 5666 5667 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5668 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5669 @*/ 5670 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5671 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5672 { 5673 PetscErrorCode ierr; 5674 Mat_MPIAIJ *maij; 5675 5676 PetscFunctionBegin; 5677 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5678 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5679 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5680 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5681 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5682 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5683 maij = (Mat_MPIAIJ*) (*mat)->data; 5684 maij->donotstash = PETSC_TRUE; 5685 (*mat)->preallocated = PETSC_TRUE; 5686 5687 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5688 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5689 5690 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5691 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5692 5693 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5694 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5695 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5696 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5697 5698 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5699 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5700 PetscFunctionReturn(0); 5701 } 5702 5703 /* 5704 Special version for direct calls from Fortran 5705 */ 5706 #include <petsc-private/fortranimpl.h> 5707 5708 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5709 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5710 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5711 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5712 #endif 5713 5714 /* Change these macros so can be used in void function */ 5715 #undef CHKERRQ 5716 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5717 #undef SETERRQ2 5718 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5719 #undef SETERRQ3 5720 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5721 #undef SETERRQ 5722 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5723 5724 EXTERN_C_BEGIN 5725 #undef __FUNCT__ 5726 #define __FUNCT__ "matsetvaluesmpiaij_" 5727 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5728 { 5729 Mat mat = *mmat; 5730 PetscInt m = *mm, n = *mn; 5731 InsertMode addv = *maddv; 5732 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5733 PetscScalar value; 5734 PetscErrorCode ierr; 5735 5736 MatCheckPreallocated(mat,1); 5737 if (mat->insertmode == NOT_SET_VALUES) { 5738 mat->insertmode = addv; 5739 } 5740 #if defined(PETSC_USE_DEBUG) 5741 else if (mat->insertmode != addv) { 5742 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5743 } 5744 #endif 5745 { 5746 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5747 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5748 PetscBool roworiented = aij->roworiented; 5749 5750 /* Some Variables required in the macro */ 5751 Mat A = aij->A; 5752 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5753 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5754 MatScalar *aa = a->a; 5755 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5756 Mat B = aij->B; 5757 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5758 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5759 MatScalar *ba = b->a; 5760 5761 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5762 PetscInt nonew = a->nonew; 5763 MatScalar *ap1,*ap2; 5764 5765 PetscFunctionBegin; 5766 for (i=0; i<m; i++) { 5767 if (im[i] < 0) continue; 5768 #if defined(PETSC_USE_DEBUG) 5769 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5770 #endif 5771 if (im[i] >= rstart && im[i] < rend) { 5772 row = im[i] - rstart; 5773 lastcol1 = -1; 5774 rp1 = aj + ai[row]; 5775 ap1 = aa + ai[row]; 5776 rmax1 = aimax[row]; 5777 nrow1 = ailen[row]; 5778 low1 = 0; 5779 high1 = nrow1; 5780 lastcol2 = -1; 5781 rp2 = bj + bi[row]; 5782 ap2 = ba + bi[row]; 5783 rmax2 = bimax[row]; 5784 nrow2 = bilen[row]; 5785 low2 = 0; 5786 high2 = nrow2; 5787 5788 for (j=0; j<n; j++) { 5789 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5790 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5791 if (in[j] >= cstart && in[j] < cend){ 5792 col = in[j] - cstart; 5793 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5794 } else if (in[j] < 0) continue; 5795 #if defined(PETSC_USE_DEBUG) 5796 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5797 #endif 5798 else { 5799 if (mat->was_assembled) { 5800 if (!aij->colmap) { 5801 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5802 } 5803 #if defined (PETSC_USE_CTABLE) 5804 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5805 col--; 5806 #else 5807 col = aij->colmap[in[j]] - 1; 5808 #endif 5809 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5810 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5811 col = in[j]; 5812 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5813 B = aij->B; 5814 b = (Mat_SeqAIJ*)B->data; 5815 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5816 rp2 = bj + bi[row]; 5817 ap2 = ba + bi[row]; 5818 rmax2 = bimax[row]; 5819 nrow2 = bilen[row]; 5820 low2 = 0; 5821 high2 = nrow2; 5822 bm = aij->B->rmap->n; 5823 ba = b->a; 5824 } 5825 } else col = in[j]; 5826 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5827 } 5828 } 5829 } else { 5830 if (!aij->donotstash) { 5831 if (roworiented) { 5832 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5833 } else { 5834 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5835 } 5836 } 5837 } 5838 }} 5839 PetscFunctionReturnVoid(); 5840 } 5841 EXTERN_C_END 5842 5843