1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr); 187 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 188 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 189 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 190 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 191 rowners[0] = 0; 192 for (i=2; i<=size; i++) { 193 rowners[i] += rowners[i-1]; 194 } 195 rstart = rowners[rank]; 196 rend = rowners[rank+1]; 197 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 198 if (!rank) { 199 gmata = (Mat_SeqAIJ*) gmat->data; 200 /* send row lengths to all processors */ 201 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 202 for (i=1; i<size; i++) { 203 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 204 } 205 /* determine number diagonal and off-diagonal counts */ 206 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 207 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 208 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 209 jj = 0; 210 for (i=0; i<m; i++) { 211 for (j=0; j<dlens[i]; j++) { 212 if (gmata->j[jj] < rstart) ld[i]++; 213 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 214 jj++; 215 } 216 } 217 /* send column indices to other processes */ 218 for (i=1; i<size; i++) { 219 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 220 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 222 } 223 224 /* send numerical values to other processes */ 225 for (i=1; i<size; i++) { 226 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 227 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 228 } 229 gmataa = gmata->a; 230 gmataj = gmata->j; 231 232 } else { 233 /* receive row lengths */ 234 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 235 /* receive column indices */ 236 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 237 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 238 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 239 /* determine number diagonal and off-diagonal counts */ 240 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 241 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 242 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 243 jj = 0; 244 for (i=0; i<m; i++) { 245 for (j=0; j<dlens[i]; j++) { 246 if (gmataj[jj] < rstart) ld[i]++; 247 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 248 jj++; 249 } 250 } 251 /* receive numerical values */ 252 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 253 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 254 } 255 /* set preallocation */ 256 for (i=0; i<m; i++) { 257 dlens[i] -= olens[i]; 258 } 259 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 260 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 261 262 for (i=0; i<m; i++) { 263 dlens[i] += olens[i]; 264 } 265 cnt = 0; 266 for (i=0; i<m; i++) { 267 row = rstart + i; 268 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 269 cnt += dlens[i]; 270 } 271 if (rank) { 272 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 273 } 274 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 275 ierr = PetscFree(rowners);CHKERRQ(ierr); 276 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 277 *inmat = mat; 278 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 279 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 280 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 281 mat = *inmat; 282 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 283 if (!rank) { 284 /* send numerical values to other processes */ 285 gmata = (Mat_SeqAIJ*) gmat->data; 286 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 287 gmataa = gmata->a; 288 for (i=1; i<size; i++) { 289 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 290 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 291 } 292 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 293 } else { 294 /* receive numerical values from process 0*/ 295 nz = Ad->nz + Ao->nz; 296 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 297 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 298 } 299 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 300 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 301 ad = Ad->a; 302 ao = Ao->a; 303 if (mat->rmap->n) { 304 i = 0; 305 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 306 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 307 } 308 for (i=1; i<mat->rmap->n; i++) { 309 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 310 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 311 } 312 i--; 313 if (mat->rmap->n) { 314 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 315 } 316 if (rank) { 317 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 318 } 319 } 320 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 322 CHKMEMQ; 323 PetscFunctionReturn(0); 324 } 325 326 /* 327 Local utility routine that creates a mapping from the global column 328 number to the local number in the off-diagonal part of the local 329 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 330 a slightly higher hash table cost; without it it is not scalable (each processor 331 has an order N integer array but is fast to acess. 332 */ 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 336 { 337 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 338 PetscErrorCode ierr; 339 PetscInt n = aij->B->cmap->n,i; 340 341 PetscFunctionBegin; 342 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 343 #if defined (PETSC_USE_CTABLE) 344 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 345 for (i=0; i<n; i++){ 346 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 347 } 348 #else 349 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 350 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 351 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 352 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 353 #endif 354 PetscFunctionReturn(0); 355 } 356 357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 358 { \ 359 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 360 lastcol1 = col;\ 361 while (high1-low1 > 5) { \ 362 t = (low1+high1)/2; \ 363 if (rp1[t] > col) high1 = t; \ 364 else low1 = t; \ 365 } \ 366 for (_i=low1; _i<high1; _i++) { \ 367 if (rp1[_i] > col) break; \ 368 if (rp1[_i] == col) { \ 369 if (addv == ADD_VALUES) ap1[_i] += value; \ 370 else ap1[_i] = value; \ 371 goto a_noinsert; \ 372 } \ 373 } \ 374 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 375 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 376 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 377 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 378 N = nrow1++ - 1; a->nz++; high1++; \ 379 /* shift up all the later entries in this row */ \ 380 for (ii=N; ii>=_i; ii--) { \ 381 rp1[ii+1] = rp1[ii]; \ 382 ap1[ii+1] = ap1[ii]; \ 383 } \ 384 rp1[_i] = col; \ 385 ap1[_i] = value; \ 386 a_noinsert: ; \ 387 ailen[row] = nrow1; \ 388 } 389 390 391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 392 { \ 393 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 394 lastcol2 = col;\ 395 while (high2-low2 > 5) { \ 396 t = (low2+high2)/2; \ 397 if (rp2[t] > col) high2 = t; \ 398 else low2 = t; \ 399 } \ 400 for (_i=low2; _i<high2; _i++) { \ 401 if (rp2[_i] > col) break; \ 402 if (rp2[_i] == col) { \ 403 if (addv == ADD_VALUES) ap2[_i] += value; \ 404 else ap2[_i] = value; \ 405 goto b_noinsert; \ 406 } \ 407 } \ 408 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 409 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 410 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 411 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 412 N = nrow2++ - 1; b->nz++; high2++; \ 413 /* shift up all the later entries in this row */ \ 414 for (ii=N; ii>=_i; ii--) { \ 415 rp2[ii+1] = rp2[ii]; \ 416 ap2[ii+1] = ap2[ii]; \ 417 } \ 418 rp2[_i] = col; \ 419 ap2[_i] = value; \ 420 b_noinsert: ; \ 421 bilen[row] = nrow2; \ 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 427 { 428 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 430 PetscErrorCode ierr; 431 PetscInt l,*garray = mat->garray,diag; 432 433 PetscFunctionBegin; 434 /* code only works for square matrices A */ 435 436 /* find size of row to the left of the diagonal part */ 437 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 438 row = row - diag; 439 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 440 if (garray[b->j[b->i[row]+l]] > diag) break; 441 } 442 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 443 444 /* diagonal part */ 445 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 446 447 /* right of diagonal part */ 448 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValues_MPIAIJ" 454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscScalar value; 458 PetscErrorCode ierr; 459 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 460 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 461 PetscBool roworiented = aij->roworiented; 462 463 /* Some Variables required in the macro */ 464 Mat A = aij->A; 465 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 466 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 467 MatScalar *aa = a->a; 468 PetscBool ignorezeroentries = a->ignorezeroentries; 469 Mat B = aij->B; 470 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 471 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 472 MatScalar *ba = b->a; 473 474 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 475 PetscInt nonew; 476 MatScalar *ap1,*ap2; 477 478 PetscFunctionBegin; 479 if (v) PetscValidScalarPointer(v,6); 480 for (i=0; i<m; i++) { 481 if (im[i] < 0) continue; 482 #if defined(PETSC_USE_DEBUG) 483 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 484 #endif 485 if (im[i] >= rstart && im[i] < rend) { 486 row = im[i] - rstart; 487 lastcol1 = -1; 488 rp1 = aj + ai[row]; 489 ap1 = aa + ai[row]; 490 rmax1 = aimax[row]; 491 nrow1 = ailen[row]; 492 low1 = 0; 493 high1 = nrow1; 494 lastcol2 = -1; 495 rp2 = bj + bi[row]; 496 ap2 = ba + bi[row]; 497 rmax2 = bimax[row]; 498 nrow2 = bilen[row]; 499 low2 = 0; 500 high2 = nrow2; 501 502 for (j=0; j<n; j++) { 503 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 504 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 505 if (in[j] >= cstart && in[j] < cend){ 506 col = in[j] - cstart; 507 nonew = a->nonew; 508 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 509 } else if (in[j] < 0) continue; 510 #if defined(PETSC_USE_DEBUG) 511 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 512 #endif 513 else { 514 if (mat->was_assembled) { 515 if (!aij->colmap) { 516 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 517 } 518 #if defined (PETSC_USE_CTABLE) 519 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col--; 521 #else 522 col = aij->colmap[in[j]] - 1; 523 #endif 524 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 528 B = aij->B; 529 b = (Mat_SeqAIJ*)B->data; 530 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 531 rp2 = bj + bi[row]; 532 ap2 = ba + bi[row]; 533 rmax2 = bimax[row]; 534 nrow2 = bilen[row]; 535 low2 = 0; 536 high2 = nrow2; 537 bm = aij->B->rmap->n; 538 ba = b->a; 539 } 540 } else col = in[j]; 541 nonew = b->nonew; 542 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 543 } 544 } 545 } else { 546 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 547 if (!aij->donotstash) { 548 mat->assembled = PETSC_FALSE; 549 if (roworiented) { 550 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatGetValues_MPIAIJ" 562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 563 { 564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 565 PetscErrorCode ierr; 566 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 567 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 568 569 PetscFunctionBegin; 570 for (i=0; i<m; i++) { 571 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 572 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 573 if (idxm[i] >= rstart && idxm[i] < rend) { 574 row = idxm[i] - rstart; 575 for (j=0; j<n; j++) { 576 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 577 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 578 if (idxn[j] >= cstart && idxn[j] < cend){ 579 col = idxn[j] - cstart; 580 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } else { 582 if (!aij->colmap) { 583 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 584 } 585 #if defined (PETSC_USE_CTABLE) 586 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 587 col --; 588 #else 589 col = aij->colmap[idxn[j]] - 1; 590 #endif 591 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 592 else { 593 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 594 } 595 } 596 } 597 } else { 598 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 599 } 600 } 601 PetscFunctionReturn(0); 602 } 603 604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 609 { 610 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 611 PetscErrorCode ierr; 612 PetscInt nstash,reallocs; 613 InsertMode addv; 614 615 PetscFunctionBegin; 616 if (aij->donotstash || mat->nooffprocentries) { 617 PetscFunctionReturn(0); 618 } 619 620 /* make sure all processors are either in INSERTMODE or ADDMODE */ 621 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 622 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 623 mat->insertmode = addv; /* in case this processor had no cache */ 624 625 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 626 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 627 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 634 { 635 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 636 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 637 PetscErrorCode ierr; 638 PetscMPIInt n; 639 PetscInt i,j,rstart,ncols,flg; 640 PetscInt *row,*col; 641 PetscBool other_disassembled; 642 PetscScalar *val; 643 InsertMode addv = mat->insertmode; 644 645 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 646 PetscFunctionBegin; 647 if (!aij->donotstash && !mat->nooffprocentries) { 648 while (1) { 649 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 650 if (!flg) break; 651 652 for (i=0; i<n;) { 653 /* Now identify the consecutive vals belonging to the same row */ 654 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 655 if (j < n) ncols = j-i; 656 else ncols = n-i; 657 /* Now assemble all these values with a single function call */ 658 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 659 i = j; 660 } 661 } 662 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 663 } 664 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 665 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 666 667 /* determine if any processor has disassembled, if so we must 668 also disassemble ourselfs, in order that we may reassemble. */ 669 /* 670 if nonzero structure of submatrix B cannot change then we know that 671 no processor disassembled thus we can skip this stuff 672 */ 673 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 674 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 675 if (mat->was_assembled && !other_disassembled) { 676 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 677 } 678 } 679 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 680 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 681 } 682 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 683 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 684 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 685 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 686 687 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 688 aij->rowvalues = 0; 689 690 /* used by MatAXPY() */ 691 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 692 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 693 694 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 695 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 702 { 703 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 704 PetscErrorCode ierr; 705 706 PetscFunctionBegin; 707 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 708 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatZeroRows_MPIAIJ" 714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 715 { 716 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 717 PetscErrorCode ierr; 718 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 719 PetscInt i,*owners = A->rmap->range; 720 PetscInt *nprocs,j,idx,nsends,row; 721 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 722 PetscInt *rvalues,count,base,slen,*source; 723 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 724 MPI_Comm comm = ((PetscObject)A)->comm; 725 MPI_Request *send_waits,*recv_waits; 726 MPI_Status recv_status,*send_status; 727 const PetscScalar *xx; 728 PetscScalar *bb; 729 #if defined(PETSC_DEBUG) 730 PetscBool found = PETSC_FALSE; 731 #endif 732 733 PetscFunctionBegin; 734 /* first count number of contributors to each processor */ 735 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 736 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 737 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 738 j = 0; 739 for (i=0; i<N; i++) { 740 if (lastidx > (idx = rows[i])) j = 0; 741 lastidx = idx; 742 for (; j<size; j++) { 743 if (idx >= owners[j] && idx < owners[j+1]) { 744 nprocs[2*j]++; 745 nprocs[2*j+1] = 1; 746 owner[i] = j; 747 #if defined(PETSC_DEBUG) 748 found = PETSC_TRUE; 749 #endif 750 break; 751 } 752 } 753 #if defined(PETSC_DEBUG) 754 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 755 found = PETSC_FALSE; 756 #endif 757 } 758 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 759 760 if (A->nooffproczerorows) { 761 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 762 nrecvs = nsends; 763 nmax = N; 764 } else { 765 /* inform other processors of number of messages and max length*/ 766 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 767 } 768 769 /* post receives: */ 770 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 771 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 772 for (i=0; i<nrecvs; i++) { 773 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 774 } 775 776 /* do sends: 777 1) starts[i] gives the starting index in svalues for stuff going to 778 the ith processor 779 */ 780 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 781 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 782 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 783 starts[0] = 0; 784 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 785 for (i=0; i<N; i++) { 786 svalues[starts[owner[i]]++] = rows[i]; 787 } 788 789 starts[0] = 0; 790 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 791 count = 0; 792 for (i=0; i<size; i++) { 793 if (nprocs[2*i+1]) { 794 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 795 } 796 } 797 ierr = PetscFree(starts);CHKERRQ(ierr); 798 799 base = owners[rank]; 800 801 /* wait on receives */ 802 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 803 count = nrecvs; slen = 0; 804 while (count) { 805 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 806 /* unpack receives into our local space */ 807 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 808 source[imdex] = recv_status.MPI_SOURCE; 809 lens[imdex] = n; 810 slen += n; 811 count--; 812 } 813 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 814 815 /* move the data into the send scatter */ 816 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 817 count = 0; 818 for (i=0; i<nrecvs; i++) { 819 values = rvalues + i*nmax; 820 for (j=0; j<lens[i]; j++) { 821 lrows[count++] = values[j] - base; 822 } 823 } 824 ierr = PetscFree(rvalues);CHKERRQ(ierr); 825 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 826 ierr = PetscFree(owner);CHKERRQ(ierr); 827 ierr = PetscFree(nprocs);CHKERRQ(ierr); 828 829 /* fix right hand side if needed */ 830 if (x && b) { 831 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 833 for (i=0; i<slen; i++) { 834 bb[lrows[i]] = diag*xx[lrows[i]]; 835 } 836 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 837 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 838 } 839 /* 840 Zero the required rows. If the "diagonal block" of the matrix 841 is square and the user wishes to set the diagonal we use separate 842 code so that MatSetValues() is not called for each diagonal allocating 843 new memory, thus calling lots of mallocs and slowing things down. 844 845 */ 846 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 847 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 848 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 849 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 850 } else if (diag != 0.0) { 851 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 852 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 853 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 855 } 856 for (i = 0; i < slen; i++) { 857 row = lrows[i] + rstart; 858 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 859 } 860 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 861 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 862 } else { 863 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 864 } 865 ierr = PetscFree(lrows);CHKERRQ(ierr); 866 867 /* wait on sends */ 868 if (nsends) { 869 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 870 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 871 ierr = PetscFree(send_status);CHKERRQ(ierr); 872 } 873 ierr = PetscFree(send_waits);CHKERRQ(ierr); 874 ierr = PetscFree(svalues);CHKERRQ(ierr); 875 PetscFunctionReturn(0); 876 } 877 878 #undef __FUNCT__ 879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 881 { 882 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 883 PetscErrorCode ierr; 884 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 885 PetscInt i,*owners = A->rmap->range; 886 PetscInt *nprocs,j,idx,nsends; 887 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 888 PetscInt *rvalues,count,base,slen,*source; 889 PetscInt *lens,*lrows,*values,m; 890 MPI_Comm comm = ((PetscObject)A)->comm; 891 MPI_Request *send_waits,*recv_waits; 892 MPI_Status recv_status,*send_status; 893 const PetscScalar *xx; 894 PetscScalar *bb,*mask; 895 Vec xmask,lmask; 896 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 897 const PetscInt *aj, *ii,*ridx; 898 PetscScalar *aa; 899 #if defined(PETSC_DEBUG) 900 PetscBool found = PETSC_FALSE; 901 #endif 902 903 PetscFunctionBegin; 904 /* first count number of contributors to each processor */ 905 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 906 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 907 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 908 j = 0; 909 for (i=0; i<N; i++) { 910 if (lastidx > (idx = rows[i])) j = 0; 911 lastidx = idx; 912 for (; j<size; j++) { 913 if (idx >= owners[j] && idx < owners[j+1]) { 914 nprocs[2*j]++; 915 nprocs[2*j+1] = 1; 916 owner[i] = j; 917 #if defined(PETSC_DEBUG) 918 found = PETSC_TRUE; 919 #endif 920 break; 921 } 922 } 923 #if defined(PETSC_DEBUG) 924 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 925 found = PETSC_FALSE; 926 #endif 927 } 928 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 929 930 /* inform other processors of number of messages and max length*/ 931 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 932 933 /* post receives: */ 934 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 935 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 936 for (i=0; i<nrecvs; i++) { 937 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 938 } 939 940 /* do sends: 941 1) starts[i] gives the starting index in svalues for stuff going to 942 the ith processor 943 */ 944 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 945 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 946 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 947 starts[0] = 0; 948 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 949 for (i=0; i<N; i++) { 950 svalues[starts[owner[i]]++] = rows[i]; 951 } 952 953 starts[0] = 0; 954 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 955 count = 0; 956 for (i=0; i<size; i++) { 957 if (nprocs[2*i+1]) { 958 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 959 } 960 } 961 ierr = PetscFree(starts);CHKERRQ(ierr); 962 963 base = owners[rank]; 964 965 /* wait on receives */ 966 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 967 count = nrecvs; slen = 0; 968 while (count) { 969 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 970 /* unpack receives into our local space */ 971 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 972 source[imdex] = recv_status.MPI_SOURCE; 973 lens[imdex] = n; 974 slen += n; 975 count--; 976 } 977 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 978 979 /* move the data into the send scatter */ 980 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 981 count = 0; 982 for (i=0; i<nrecvs; i++) { 983 values = rvalues + i*nmax; 984 for (j=0; j<lens[i]; j++) { 985 lrows[count++] = values[j] - base; 986 } 987 } 988 ierr = PetscFree(rvalues);CHKERRQ(ierr); 989 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 990 ierr = PetscFree(owner);CHKERRQ(ierr); 991 ierr = PetscFree(nprocs);CHKERRQ(ierr); 992 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 993 994 /* zero diagonal part of matrix */ 995 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 996 997 /* handle off diagonal part of matrix */ 998 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 999 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1000 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1001 for (i=0; i<slen; i++) { 1002 bb[lrows[i]] = 1; 1003 } 1004 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1005 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1008 if (x) { 1009 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1011 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1012 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1013 } 1014 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1015 1016 /* remove zeroed rows of off diagonal matrix */ 1017 ii = aij->i; 1018 for (i=0; i<slen; i++) { 1019 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1020 } 1021 1022 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1023 if (aij->compressedrow.use){ 1024 m = aij->compressedrow.nrows; 1025 ii = aij->compressedrow.i; 1026 ridx = aij->compressedrow.rindex; 1027 for (i=0; i<m; i++){ 1028 n = ii[i+1] - ii[i]; 1029 aj = aij->j + ii[i]; 1030 aa = aij->a + ii[i]; 1031 1032 for (j=0; j<n; j++) { 1033 if (PetscAbsScalar(mask[*aj])) { 1034 if (b) bb[*ridx] -= *aa*xx[*aj]; 1035 *aa = 0.0; 1036 } 1037 aa++; 1038 aj++; 1039 } 1040 ridx++; 1041 } 1042 } else { /* do not use compressed row format */ 1043 m = l->B->rmap->n; 1044 for (i=0; i<m; i++) { 1045 n = ii[i+1] - ii[i]; 1046 aj = aij->j + ii[i]; 1047 aa = aij->a + ii[i]; 1048 for (j=0; j<n; j++) { 1049 if (PetscAbsScalar(mask[*aj])) { 1050 if (b) bb[i] -= *aa*xx[*aj]; 1051 *aa = 0.0; 1052 } 1053 aa++; 1054 aj++; 1055 } 1056 } 1057 } 1058 if (x) { 1059 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1060 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1061 } 1062 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1063 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1064 ierr = PetscFree(lrows);CHKERRQ(ierr); 1065 1066 /* wait on sends */ 1067 if (nsends) { 1068 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1069 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1070 ierr = PetscFree(send_status);CHKERRQ(ierr); 1071 } 1072 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1073 ierr = PetscFree(svalues);CHKERRQ(ierr); 1074 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatMult_MPIAIJ" 1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1083 PetscErrorCode ierr; 1084 PetscInt nt; 1085 1086 PetscFunctionBegin; 1087 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1088 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1089 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1091 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1092 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1099 { 1100 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1111 { 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1118 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1128 PetscErrorCode ierr; 1129 PetscBool merged; 1130 1131 PetscFunctionBegin; 1132 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1133 /* do nondiagonal part */ 1134 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1135 if (!merged) { 1136 /* send it on its way */ 1137 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 /* do local part */ 1139 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1140 /* receive remote parts: note this assumes the values are not actually */ 1141 /* added in yy until the next line, */ 1142 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 } else { 1144 /* do local part */ 1145 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1146 /* send it on its way */ 1147 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1148 /* values actually were received in the Begin() but we need to call this nop */ 1149 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1150 } 1151 PetscFunctionReturn(0); 1152 } 1153 1154 EXTERN_C_BEGIN 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1157 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1158 { 1159 MPI_Comm comm; 1160 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1161 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1162 IS Me,Notme; 1163 PetscErrorCode ierr; 1164 PetscInt M,N,first,last,*notme,i; 1165 PetscMPIInt size; 1166 1167 PetscFunctionBegin; 1168 1169 /* Easy test: symmetric diagonal block */ 1170 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1171 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1172 if (!*f) PetscFunctionReturn(0); 1173 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1174 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1175 if (size == 1) PetscFunctionReturn(0); 1176 1177 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1178 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1179 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1180 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1181 for (i=0; i<first; i++) notme[i] = i; 1182 for (i=last; i<M; i++) notme[i-last+first] = i; 1183 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1184 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1185 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1186 Aoff = Aoffs[0]; 1187 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1188 Boff = Boffs[0]; 1189 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1190 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1191 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1192 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1193 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1194 ierr = PetscFree(notme);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 EXTERN_C_END 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1202 { 1203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1204 PetscErrorCode ierr; 1205 1206 PetscFunctionBegin; 1207 /* do nondiagonal part */ 1208 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1209 /* send it on its way */ 1210 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1211 /* do local part */ 1212 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1213 /* receive remote parts */ 1214 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 /* 1219 This only works correctly for square matrices where the subblock A->A is the 1220 diagonal block 1221 */ 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1225 { 1226 PetscErrorCode ierr; 1227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1228 1229 PetscFunctionBegin; 1230 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1231 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1232 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatScale_MPIAIJ" 1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1239 { 1240 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1245 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatDestroy_MPIAIJ" 1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1252 { 1253 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1254 PetscErrorCode ierr; 1255 1256 PetscFunctionBegin; 1257 #if defined(PETSC_USE_LOG) 1258 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1259 #endif 1260 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1261 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1263 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1264 #if defined (PETSC_USE_CTABLE) 1265 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1266 #else 1267 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1268 #endif 1269 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1270 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1271 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1272 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1273 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1274 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1275 1276 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1291 { 1292 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1293 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1294 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1295 PetscErrorCode ierr; 1296 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1297 int fd; 1298 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1299 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1300 PetscScalar *column_values; 1301 PetscInt message_count,flowcontrolcount; 1302 FILE *file; 1303 1304 PetscFunctionBegin; 1305 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1306 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1307 nz = A->nz + B->nz; 1308 if (!rank) { 1309 header[0] = MAT_FILE_CLASSID; 1310 header[1] = mat->rmap->N; 1311 header[2] = mat->cmap->N; 1312 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1313 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1314 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1315 /* get largest number of rows any processor has */ 1316 rlen = mat->rmap->n; 1317 range = mat->rmap->range; 1318 for (i=1; i<size; i++) { 1319 rlen = PetscMax(rlen,range[i+1] - range[i]); 1320 } 1321 } else { 1322 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1323 rlen = mat->rmap->n; 1324 } 1325 1326 /* load up the local row counts */ 1327 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1328 for (i=0; i<mat->rmap->n; i++) { 1329 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1330 } 1331 1332 /* store the row lengths to the file */ 1333 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1334 if (!rank) { 1335 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1336 for (i=1; i<size; i++) { 1337 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1338 rlen = range[i+1] - range[i]; 1339 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1340 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1341 } 1342 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1343 } else { 1344 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1345 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1346 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1347 } 1348 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1349 1350 /* load up the local column indices */ 1351 nzmax = nz; /* )th processor needs space a largest processor needs */ 1352 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1353 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1354 cnt = 0; 1355 for (i=0; i<mat->rmap->n; i++) { 1356 for (j=B->i[i]; j<B->i[i+1]; j++) { 1357 if ( (col = garray[B->j[j]]) > cstart) break; 1358 column_indices[cnt++] = col; 1359 } 1360 for (k=A->i[i]; k<A->i[i+1]; k++) { 1361 column_indices[cnt++] = A->j[k] + cstart; 1362 } 1363 for (; j<B->i[i+1]; j++) { 1364 column_indices[cnt++] = garray[B->j[j]]; 1365 } 1366 } 1367 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1368 1369 /* store the column indices to the file */ 1370 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1371 if (!rank) { 1372 MPI_Status status; 1373 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1374 for (i=1; i<size; i++) { 1375 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1376 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1377 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1378 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1379 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1380 } 1381 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1382 } else { 1383 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1384 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1385 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1386 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1387 } 1388 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1389 1390 /* load up the local column values */ 1391 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1392 cnt = 0; 1393 for (i=0; i<mat->rmap->n; i++) { 1394 for (j=B->i[i]; j<B->i[i+1]; j++) { 1395 if ( garray[B->j[j]] > cstart) break; 1396 column_values[cnt++] = B->a[j]; 1397 } 1398 for (k=A->i[i]; k<A->i[i+1]; k++) { 1399 column_values[cnt++] = A->a[k]; 1400 } 1401 for (; j<B->i[i+1]; j++) { 1402 column_values[cnt++] = B->a[j]; 1403 } 1404 } 1405 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1406 1407 /* store the column values to the file */ 1408 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1409 if (!rank) { 1410 MPI_Status status; 1411 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1412 for (i=1; i<size; i++) { 1413 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1414 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1415 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1416 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1417 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1418 } 1419 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1420 } else { 1421 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1422 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1423 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1424 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1425 } 1426 ierr = PetscFree(column_values);CHKERRQ(ierr); 1427 1428 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1429 if (file) { 1430 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1431 } 1432 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1438 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1439 { 1440 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1441 PetscErrorCode ierr; 1442 PetscMPIInt rank = aij->rank,size = aij->size; 1443 PetscBool isdraw,iascii,isbinary; 1444 PetscViewer sviewer; 1445 PetscViewerFormat format; 1446 1447 PetscFunctionBegin; 1448 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1449 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1450 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1451 if (iascii) { 1452 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1453 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1454 MatInfo info; 1455 PetscBool inodes; 1456 1457 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1458 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1459 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1460 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1461 if (!inodes) { 1462 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1463 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1464 } else { 1465 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1466 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1467 } 1468 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1469 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1470 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1471 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1472 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1473 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1474 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1475 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1476 PetscFunctionReturn(0); 1477 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1478 PetscInt inodecount,inodelimit,*inodes; 1479 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1480 if (inodes) { 1481 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1482 } else { 1483 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1484 } 1485 PetscFunctionReturn(0); 1486 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1487 PetscFunctionReturn(0); 1488 } 1489 } else if (isbinary) { 1490 if (size == 1) { 1491 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1492 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1493 } else { 1494 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1495 } 1496 PetscFunctionReturn(0); 1497 } else if (isdraw) { 1498 PetscDraw draw; 1499 PetscBool isnull; 1500 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1501 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1502 } 1503 1504 if (size == 1) { 1505 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1506 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1507 } else { 1508 /* assemble the entire matrix onto first processor. */ 1509 Mat A; 1510 Mat_SeqAIJ *Aloc; 1511 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1512 MatScalar *a; 1513 1514 if (mat->rmap->N > 1024) { 1515 PetscBool flg = PETSC_FALSE; 1516 1517 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1518 if (!flg) { 1519 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1520 } 1521 } 1522 1523 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1524 if (!rank) { 1525 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1526 } else { 1527 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1528 } 1529 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1530 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1531 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1532 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1533 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1534 1535 /* copy over the A part */ 1536 Aloc = (Mat_SeqAIJ*)aij->A->data; 1537 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1538 row = mat->rmap->rstart; 1539 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1540 for (i=0; i<m; i++) { 1541 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1542 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1543 } 1544 aj = Aloc->j; 1545 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1546 1547 /* copy over the B part */ 1548 Aloc = (Mat_SeqAIJ*)aij->B->data; 1549 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1550 row = mat->rmap->rstart; 1551 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1552 ct = cols; 1553 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1554 for (i=0; i<m; i++) { 1555 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1556 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1557 } 1558 ierr = PetscFree(ct);CHKERRQ(ierr); 1559 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1560 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1561 /* 1562 Everyone has to call to draw the matrix since the graphics waits are 1563 synchronized across all processors that share the PetscDraw object 1564 */ 1565 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1566 if (!rank) { 1567 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1568 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1569 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1570 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1571 } 1572 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1573 ierr = MatDestroy(&A);CHKERRQ(ierr); 1574 } 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "MatView_MPIAIJ" 1580 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1581 { 1582 PetscErrorCode ierr; 1583 PetscBool iascii,isdraw,issocket,isbinary; 1584 1585 PetscFunctionBegin; 1586 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1587 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1588 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1589 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1590 if (iascii || isdraw || isbinary || issocket) { 1591 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1592 } else { 1593 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1594 } 1595 PetscFunctionReturn(0); 1596 } 1597 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "MatSOR_MPIAIJ" 1600 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1601 { 1602 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1603 PetscErrorCode ierr; 1604 Vec bb1 = 0; 1605 PetscBool hasop; 1606 1607 PetscFunctionBegin; 1608 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1609 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1610 } 1611 1612 if (flag == SOR_APPLY_UPPER) { 1613 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1618 if (flag & SOR_ZERO_INITIAL_GUESS) { 1619 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1620 its--; 1621 } 1622 1623 while (its--) { 1624 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1625 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1626 1627 /* update rhs: bb1 = bb - B*x */ 1628 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1629 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1630 1631 /* local sweep */ 1632 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1633 } 1634 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1635 if (flag & SOR_ZERO_INITIAL_GUESS) { 1636 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1637 its--; 1638 } 1639 while (its--) { 1640 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1642 1643 /* update rhs: bb1 = bb - B*x */ 1644 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1645 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1646 1647 /* local sweep */ 1648 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1649 } 1650 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1651 if (flag & SOR_ZERO_INITIAL_GUESS) { 1652 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1653 its--; 1654 } 1655 while (its--) { 1656 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1657 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1658 1659 /* update rhs: bb1 = bb - B*x */ 1660 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1661 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1662 1663 /* local sweep */ 1664 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1665 } 1666 } else if (flag & SOR_EISENSTAT) { 1667 Vec xx1; 1668 1669 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1670 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1671 1672 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1673 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1674 if (!mat->diag) { 1675 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1676 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1677 } 1678 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1679 if (hasop) { 1680 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1681 } else { 1682 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1683 } 1684 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1685 1686 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1687 1688 /* local sweep */ 1689 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1690 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1691 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1692 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1693 1694 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "MatPermute_MPIAIJ" 1700 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1701 { 1702 MPI_Comm comm; 1703 PetscInt first,local_rowsize,local_colsize; 1704 const PetscInt *rows; 1705 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1706 PetscErrorCode ierr; 1707 1708 PetscFunctionBegin; 1709 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1710 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1711 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1712 /* collect the global row permutation and invert it */ 1713 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1714 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1715 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1716 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1717 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1718 /* get the local target indices */ 1719 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1720 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1721 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1722 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1723 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1724 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1725 /* the column permutation is so much easier; 1726 make a local version of 'colp' and invert it */ 1727 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1728 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1729 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1730 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1731 /* now we just get the submatrix */ 1732 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1733 /* clean up */ 1734 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1735 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1736 PetscFunctionReturn(0); 1737 } 1738 1739 #undef __FUNCT__ 1740 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1741 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1742 { 1743 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1744 Mat A = mat->A,B = mat->B; 1745 PetscErrorCode ierr; 1746 PetscReal isend[5],irecv[5]; 1747 1748 PetscFunctionBegin; 1749 info->block_size = 1.0; 1750 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1751 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1752 isend[3] = info->memory; isend[4] = info->mallocs; 1753 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1754 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1755 isend[3] += info->memory; isend[4] += info->mallocs; 1756 if (flag == MAT_LOCAL) { 1757 info->nz_used = isend[0]; 1758 info->nz_allocated = isend[1]; 1759 info->nz_unneeded = isend[2]; 1760 info->memory = isend[3]; 1761 info->mallocs = isend[4]; 1762 } else if (flag == MAT_GLOBAL_MAX) { 1763 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1764 info->nz_used = irecv[0]; 1765 info->nz_allocated = irecv[1]; 1766 info->nz_unneeded = irecv[2]; 1767 info->memory = irecv[3]; 1768 info->mallocs = irecv[4]; 1769 } else if (flag == MAT_GLOBAL_SUM) { 1770 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1771 info->nz_used = irecv[0]; 1772 info->nz_allocated = irecv[1]; 1773 info->nz_unneeded = irecv[2]; 1774 info->memory = irecv[3]; 1775 info->mallocs = irecv[4]; 1776 } 1777 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1778 info->fill_ratio_needed = 0; 1779 info->factor_mallocs = 0; 1780 1781 PetscFunctionReturn(0); 1782 } 1783 1784 #undef __FUNCT__ 1785 #define __FUNCT__ "MatSetOption_MPIAIJ" 1786 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1787 { 1788 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1789 PetscErrorCode ierr; 1790 1791 PetscFunctionBegin; 1792 switch (op) { 1793 case MAT_NEW_NONZERO_LOCATIONS: 1794 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1795 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1796 case MAT_KEEP_NONZERO_PATTERN: 1797 case MAT_NEW_NONZERO_LOCATION_ERR: 1798 case MAT_USE_INODES: 1799 case MAT_IGNORE_ZERO_ENTRIES: 1800 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1801 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1802 break; 1803 case MAT_ROW_ORIENTED: 1804 a->roworiented = flg; 1805 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1806 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1807 break; 1808 case MAT_NEW_DIAGONALS: 1809 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1810 break; 1811 case MAT_IGNORE_OFF_PROC_ENTRIES: 1812 a->donotstash = flg; 1813 break; 1814 case MAT_SPD: 1815 A->spd_set = PETSC_TRUE; 1816 A->spd = flg; 1817 if (flg) { 1818 A->symmetric = PETSC_TRUE; 1819 A->structurally_symmetric = PETSC_TRUE; 1820 A->symmetric_set = PETSC_TRUE; 1821 A->structurally_symmetric_set = PETSC_TRUE; 1822 } 1823 break; 1824 case MAT_SYMMETRIC: 1825 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1826 break; 1827 case MAT_STRUCTURALLY_SYMMETRIC: 1828 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1829 break; 1830 case MAT_HERMITIAN: 1831 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1832 break; 1833 case MAT_SYMMETRY_ETERNAL: 1834 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1835 break; 1836 default: 1837 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1838 } 1839 PetscFunctionReturn(0); 1840 } 1841 1842 #undef __FUNCT__ 1843 #define __FUNCT__ "MatGetRow_MPIAIJ" 1844 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1845 { 1846 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1847 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1848 PetscErrorCode ierr; 1849 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1850 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1851 PetscInt *cmap,*idx_p; 1852 1853 PetscFunctionBegin; 1854 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1855 mat->getrowactive = PETSC_TRUE; 1856 1857 if (!mat->rowvalues && (idx || v)) { 1858 /* 1859 allocate enough space to hold information from the longest row. 1860 */ 1861 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1862 PetscInt max = 1,tmp; 1863 for (i=0; i<matin->rmap->n; i++) { 1864 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1865 if (max < tmp) { max = tmp; } 1866 } 1867 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1868 } 1869 1870 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1871 lrow = row - rstart; 1872 1873 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1874 if (!v) {pvA = 0; pvB = 0;} 1875 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1876 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1877 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1878 nztot = nzA + nzB; 1879 1880 cmap = mat->garray; 1881 if (v || idx) { 1882 if (nztot) { 1883 /* Sort by increasing column numbers, assuming A and B already sorted */ 1884 PetscInt imark = -1; 1885 if (v) { 1886 *v = v_p = mat->rowvalues; 1887 for (i=0; i<nzB; i++) { 1888 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1889 else break; 1890 } 1891 imark = i; 1892 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1893 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1894 } 1895 if (idx) { 1896 *idx = idx_p = mat->rowindices; 1897 if (imark > -1) { 1898 for (i=0; i<imark; i++) { 1899 idx_p[i] = cmap[cworkB[i]]; 1900 } 1901 } else { 1902 for (i=0; i<nzB; i++) { 1903 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1904 else break; 1905 } 1906 imark = i; 1907 } 1908 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1909 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1910 } 1911 } else { 1912 if (idx) *idx = 0; 1913 if (v) *v = 0; 1914 } 1915 } 1916 *nz = nztot; 1917 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1918 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1919 PetscFunctionReturn(0); 1920 } 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1924 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1925 { 1926 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1927 1928 PetscFunctionBegin; 1929 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1930 aij->getrowactive = PETSC_FALSE; 1931 PetscFunctionReturn(0); 1932 } 1933 1934 #undef __FUNCT__ 1935 #define __FUNCT__ "MatNorm_MPIAIJ" 1936 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1937 { 1938 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1939 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1940 PetscErrorCode ierr; 1941 PetscInt i,j,cstart = mat->cmap->rstart; 1942 PetscReal sum = 0.0; 1943 MatScalar *v; 1944 1945 PetscFunctionBegin; 1946 if (aij->size == 1) { 1947 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1948 } else { 1949 if (type == NORM_FROBENIUS) { 1950 v = amat->a; 1951 for (i=0; i<amat->nz; i++) { 1952 #if defined(PETSC_USE_COMPLEX) 1953 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1954 #else 1955 sum += (*v)*(*v); v++; 1956 #endif 1957 } 1958 v = bmat->a; 1959 for (i=0; i<bmat->nz; i++) { 1960 #if defined(PETSC_USE_COMPLEX) 1961 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1962 #else 1963 sum += (*v)*(*v); v++; 1964 #endif 1965 } 1966 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1967 *norm = PetscSqrtReal(*norm); 1968 } else if (type == NORM_1) { /* max column norm */ 1969 PetscReal *tmp,*tmp2; 1970 PetscInt *jj,*garray = aij->garray; 1971 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1972 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1973 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1974 *norm = 0.0; 1975 v = amat->a; jj = amat->j; 1976 for (j=0; j<amat->nz; j++) { 1977 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1978 } 1979 v = bmat->a; jj = bmat->j; 1980 for (j=0; j<bmat->nz; j++) { 1981 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1982 } 1983 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1984 for (j=0; j<mat->cmap->N; j++) { 1985 if (tmp2[j] > *norm) *norm = tmp2[j]; 1986 } 1987 ierr = PetscFree(tmp);CHKERRQ(ierr); 1988 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1989 } else if (type == NORM_INFINITY) { /* max row norm */ 1990 PetscReal ntemp = 0.0; 1991 for (j=0; j<aij->A->rmap->n; j++) { 1992 v = amat->a + amat->i[j]; 1993 sum = 0.0; 1994 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1995 sum += PetscAbsScalar(*v); v++; 1996 } 1997 v = bmat->a + bmat->i[j]; 1998 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1999 sum += PetscAbsScalar(*v); v++; 2000 } 2001 if (sum > ntemp) ntemp = sum; 2002 } 2003 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2004 } else { 2005 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2006 } 2007 } 2008 PetscFunctionReturn(0); 2009 } 2010 2011 #undef __FUNCT__ 2012 #define __FUNCT__ "MatTranspose_MPIAIJ" 2013 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2014 { 2015 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2016 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2017 PetscErrorCode ierr; 2018 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2019 PetscInt cstart=A->cmap->rstart,ncol; 2020 Mat B; 2021 MatScalar *array; 2022 2023 PetscFunctionBegin; 2024 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2025 2026 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2027 ai = Aloc->i; aj = Aloc->j; 2028 bi = Bloc->i; bj = Bloc->j; 2029 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2030 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2031 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2032 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2033 for (i=0; i<ai[ma]; i++){ 2034 d_nnz[aj[i]] ++; 2035 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2036 } 2037 2038 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2039 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2040 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2041 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2042 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2043 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2044 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2045 } else { 2046 B = *matout; 2047 } 2048 2049 /* copy over the A part */ 2050 array = Aloc->a; 2051 row = A->rmap->rstart; 2052 for (i=0; i<ma; i++) { 2053 ncol = ai[i+1]-ai[i]; 2054 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2055 row++; array += ncol; aj += ncol; 2056 } 2057 aj = Aloc->j; 2058 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2059 2060 /* copy over the B part */ 2061 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2062 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2063 array = Bloc->a; 2064 row = A->rmap->rstart; 2065 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2066 cols_tmp = cols; 2067 for (i=0; i<mb; i++) { 2068 ncol = bi[i+1]-bi[i]; 2069 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2070 row++; array += ncol; cols_tmp += ncol; 2071 } 2072 ierr = PetscFree(cols);CHKERRQ(ierr); 2073 2074 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2075 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2076 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2077 *matout = B; 2078 } else { 2079 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2080 } 2081 PetscFunctionReturn(0); 2082 } 2083 2084 #undef __FUNCT__ 2085 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2086 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2087 { 2088 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2089 Mat a = aij->A,b = aij->B; 2090 PetscErrorCode ierr; 2091 PetscInt s1,s2,s3; 2092 2093 PetscFunctionBegin; 2094 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2095 if (rr) { 2096 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2097 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2098 /* Overlap communication with computation. */ 2099 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2100 } 2101 if (ll) { 2102 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2103 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2104 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2105 } 2106 /* scale the diagonal block */ 2107 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2108 2109 if (rr) { 2110 /* Do a scatter end and then right scale the off-diagonal block */ 2111 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2112 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2113 } 2114 2115 PetscFunctionReturn(0); 2116 } 2117 2118 #undef __FUNCT__ 2119 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2120 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2121 { 2122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2123 PetscErrorCode ierr; 2124 2125 PetscFunctionBegin; 2126 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2127 PetscFunctionReturn(0); 2128 } 2129 2130 #undef __FUNCT__ 2131 #define __FUNCT__ "MatEqual_MPIAIJ" 2132 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2133 { 2134 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2135 Mat a,b,c,d; 2136 PetscBool flg; 2137 PetscErrorCode ierr; 2138 2139 PetscFunctionBegin; 2140 a = matA->A; b = matA->B; 2141 c = matB->A; d = matB->B; 2142 2143 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2144 if (flg) { 2145 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2146 } 2147 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatCopy_MPIAIJ" 2153 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2154 { 2155 PetscErrorCode ierr; 2156 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2157 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2158 2159 PetscFunctionBegin; 2160 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2161 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2162 /* because of the column compression in the off-processor part of the matrix a->B, 2163 the number of columns in a->B and b->B may be different, hence we cannot call 2164 the MatCopy() directly on the two parts. If need be, we can provide a more 2165 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2166 then copying the submatrices */ 2167 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2168 } else { 2169 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2170 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2171 } 2172 PetscFunctionReturn(0); 2173 } 2174 2175 #undef __FUNCT__ 2176 #define __FUNCT__ "MatSetUp_MPIAIJ" 2177 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2178 { 2179 PetscErrorCode ierr; 2180 2181 PetscFunctionBegin; 2182 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2183 PetscFunctionReturn(0); 2184 } 2185 2186 #undef __FUNCT__ 2187 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2188 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2189 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2190 { 2191 PetscInt i,m=Y->rmap->N; 2192 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2193 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2194 const PetscInt *xi = x->i,*yi = y->i; 2195 2196 PetscFunctionBegin; 2197 /* Set the number of nonzeros in the new matrix */ 2198 for(i=0; i<m; i++) { 2199 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2200 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2201 nnz[i] = 0; 2202 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2203 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2204 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2205 nnz[i]++; 2206 } 2207 for (; k<nzy; k++) nnz[i]++; 2208 } 2209 PetscFunctionReturn(0); 2210 } 2211 2212 #undef __FUNCT__ 2213 #define __FUNCT__ "MatAXPY_MPIAIJ" 2214 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2215 { 2216 PetscErrorCode ierr; 2217 PetscInt i; 2218 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2219 PetscBLASInt bnz,one=1; 2220 Mat_SeqAIJ *x,*y; 2221 2222 PetscFunctionBegin; 2223 if (str == SAME_NONZERO_PATTERN) { 2224 PetscScalar alpha = a; 2225 x = (Mat_SeqAIJ *)xx->A->data; 2226 y = (Mat_SeqAIJ *)yy->A->data; 2227 bnz = PetscBLASIntCast(x->nz); 2228 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2229 x = (Mat_SeqAIJ *)xx->B->data; 2230 y = (Mat_SeqAIJ *)yy->B->data; 2231 bnz = PetscBLASIntCast(x->nz); 2232 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2233 } else if (str == SUBSET_NONZERO_PATTERN) { 2234 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2235 2236 x = (Mat_SeqAIJ *)xx->B->data; 2237 y = (Mat_SeqAIJ *)yy->B->data; 2238 if (y->xtoy && y->XtoY != xx->B) { 2239 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2240 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2241 } 2242 if (!y->xtoy) { /* get xtoy */ 2243 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2244 y->XtoY = xx->B; 2245 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2246 } 2247 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2248 } else { 2249 Mat B; 2250 PetscInt *nnz_d,*nnz_o; 2251 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2252 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2253 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2254 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2255 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2256 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2257 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2258 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2259 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2260 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2261 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2262 ierr = MatHeaderReplace(Y,B); 2263 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2264 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2265 } 2266 PetscFunctionReturn(0); 2267 } 2268 2269 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2270 2271 #undef __FUNCT__ 2272 #define __FUNCT__ "MatConjugate_MPIAIJ" 2273 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2274 { 2275 #if defined(PETSC_USE_COMPLEX) 2276 PetscErrorCode ierr; 2277 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2278 2279 PetscFunctionBegin; 2280 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2281 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2282 #else 2283 PetscFunctionBegin; 2284 #endif 2285 PetscFunctionReturn(0); 2286 } 2287 2288 #undef __FUNCT__ 2289 #define __FUNCT__ "MatRealPart_MPIAIJ" 2290 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2291 { 2292 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2293 PetscErrorCode ierr; 2294 2295 PetscFunctionBegin; 2296 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2297 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2298 PetscFunctionReturn(0); 2299 } 2300 2301 #undef __FUNCT__ 2302 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2303 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2304 { 2305 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2306 PetscErrorCode ierr; 2307 2308 PetscFunctionBegin; 2309 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2310 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2311 PetscFunctionReturn(0); 2312 } 2313 2314 #ifdef PETSC_HAVE_PBGL 2315 2316 #include <boost/parallel/mpi/bsp_process_group.hpp> 2317 #include <boost/graph/distributed/ilu_default_graph.hpp> 2318 #include <boost/graph/distributed/ilu_0_block.hpp> 2319 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2320 #include <boost/graph/distributed/petsc/interface.hpp> 2321 #include <boost/multi_array.hpp> 2322 #include <boost/parallel/distributed_property_map->hpp> 2323 2324 #undef __FUNCT__ 2325 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2326 /* 2327 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2328 */ 2329 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2330 { 2331 namespace petsc = boost::distributed::petsc; 2332 2333 namespace graph_dist = boost::graph::distributed; 2334 using boost::graph::distributed::ilu_default::process_group_type; 2335 using boost::graph::ilu_permuted; 2336 2337 PetscBool row_identity, col_identity; 2338 PetscContainer c; 2339 PetscInt m, n, M, N; 2340 PetscErrorCode ierr; 2341 2342 PetscFunctionBegin; 2343 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2344 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2345 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2346 if (!row_identity || !col_identity) { 2347 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2348 } 2349 2350 process_group_type pg; 2351 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2352 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2353 lgraph_type& level_graph = *lgraph_p; 2354 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2355 2356 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2357 ilu_permuted(level_graph); 2358 2359 /* put together the new matrix */ 2360 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2361 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2362 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2363 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2364 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2365 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2366 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2367 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2368 2369 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2370 ierr = PetscContainerSetPointer(c, lgraph_p); 2371 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2372 ierr = PetscContainerDestroy(&c); 2373 PetscFunctionReturn(0); 2374 } 2375 2376 #undef __FUNCT__ 2377 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2378 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2379 { 2380 PetscFunctionBegin; 2381 PetscFunctionReturn(0); 2382 } 2383 2384 #undef __FUNCT__ 2385 #define __FUNCT__ "MatSolve_MPIAIJ" 2386 /* 2387 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2388 */ 2389 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2390 { 2391 namespace graph_dist = boost::graph::distributed; 2392 2393 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2394 lgraph_type* lgraph_p; 2395 PetscContainer c; 2396 PetscErrorCode ierr; 2397 2398 PetscFunctionBegin; 2399 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2400 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2401 ierr = VecCopy(b, x);CHKERRQ(ierr); 2402 2403 PetscScalar* array_x; 2404 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2405 PetscInt sx; 2406 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2407 2408 PetscScalar* array_b; 2409 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2410 PetscInt sb; 2411 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2412 2413 lgraph_type& level_graph = *lgraph_p; 2414 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2415 2416 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2417 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2418 ref_x(array_x, boost::extents[num_vertices(graph)]); 2419 2420 typedef boost::iterator_property_map<array_ref_type::iterator, 2421 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2422 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2423 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2424 2425 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2426 2427 PetscFunctionReturn(0); 2428 } 2429 #endif 2430 2431 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2432 PetscInt nzlocal,nsends,nrecvs; 2433 PetscMPIInt *send_rank,*recv_rank; 2434 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2435 PetscScalar *sbuf_a,**rbuf_a; 2436 PetscErrorCode (*Destroy)(Mat); 2437 } Mat_Redundant; 2438 2439 #undef __FUNCT__ 2440 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2441 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2442 { 2443 PetscErrorCode ierr; 2444 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2445 PetscInt i; 2446 2447 PetscFunctionBegin; 2448 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2449 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2450 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2451 for (i=0; i<redund->nrecvs; i++){ 2452 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2453 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2454 } 2455 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2456 ierr = PetscFree(redund);CHKERRQ(ierr); 2457 PetscFunctionReturn(0); 2458 } 2459 2460 #undef __FUNCT__ 2461 #define __FUNCT__ "MatDestroy_MatRedundant" 2462 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2463 { 2464 PetscErrorCode ierr; 2465 PetscContainer container; 2466 Mat_Redundant *redund=PETSC_NULL; 2467 2468 PetscFunctionBegin; 2469 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2470 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2471 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2472 A->ops->destroy = redund->Destroy; 2473 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2474 if (A->ops->destroy) { 2475 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2476 } 2477 PetscFunctionReturn(0); 2478 } 2479 2480 #undef __FUNCT__ 2481 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2482 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2483 { 2484 PetscMPIInt rank,size; 2485 MPI_Comm comm=((PetscObject)mat)->comm; 2486 PetscErrorCode ierr; 2487 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2488 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2489 PetscInt *rowrange=mat->rmap->range; 2490 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2491 Mat A=aij->A,B=aij->B,C=*matredundant; 2492 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2493 PetscScalar *sbuf_a; 2494 PetscInt nzlocal=a->nz+b->nz; 2495 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2496 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2497 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2498 MatScalar *aworkA,*aworkB; 2499 PetscScalar *vals; 2500 PetscMPIInt tag1,tag2,tag3,imdex; 2501 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2502 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2503 MPI_Status recv_status,*send_status; 2504 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2505 PetscInt **rbuf_j=PETSC_NULL; 2506 PetscScalar **rbuf_a=PETSC_NULL; 2507 Mat_Redundant *redund=PETSC_NULL; 2508 PetscContainer container; 2509 2510 PetscFunctionBegin; 2511 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2512 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2513 2514 if (reuse == MAT_REUSE_MATRIX) { 2515 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2516 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2517 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2518 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2519 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2520 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2521 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2522 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2523 2524 nsends = redund->nsends; 2525 nrecvs = redund->nrecvs; 2526 send_rank = redund->send_rank; 2527 recv_rank = redund->recv_rank; 2528 sbuf_nz = redund->sbuf_nz; 2529 rbuf_nz = redund->rbuf_nz; 2530 sbuf_j = redund->sbuf_j; 2531 sbuf_a = redund->sbuf_a; 2532 rbuf_j = redund->rbuf_j; 2533 rbuf_a = redund->rbuf_a; 2534 } 2535 2536 if (reuse == MAT_INITIAL_MATRIX){ 2537 PetscMPIInt subrank,subsize; 2538 PetscInt nleftover,np_subcomm; 2539 /* get the destination processors' id send_rank, nsends and nrecvs */ 2540 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2541 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2542 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2543 np_subcomm = size/nsubcomm; 2544 nleftover = size - nsubcomm*np_subcomm; 2545 nsends = 0; nrecvs = 0; 2546 for (i=0; i<size; i++){ /* i=rank*/ 2547 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2548 send_rank[nsends] = i; nsends++; 2549 recv_rank[nrecvs++] = i; 2550 } 2551 } 2552 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2553 i = size-nleftover-1; 2554 j = 0; 2555 while (j < nsubcomm - nleftover){ 2556 send_rank[nsends++] = i; 2557 i--; j++; 2558 } 2559 } 2560 2561 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2562 for (i=0; i<nleftover; i++){ 2563 recv_rank[nrecvs++] = size-nleftover+i; 2564 } 2565 } 2566 2567 /* allocate sbuf_j, sbuf_a */ 2568 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2569 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2570 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2571 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2572 2573 /* copy mat's local entries into the buffers */ 2574 if (reuse == MAT_INITIAL_MATRIX){ 2575 rownz_max = 0; 2576 rptr = sbuf_j; 2577 cols = sbuf_j + rend-rstart + 1; 2578 vals = sbuf_a; 2579 rptr[0] = 0; 2580 for (i=0; i<rend-rstart; i++){ 2581 row = i + rstart; 2582 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2583 ncols = nzA + nzB; 2584 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2585 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2586 /* load the column indices for this row into cols */ 2587 lwrite = 0; 2588 for (l=0; l<nzB; l++) { 2589 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2590 vals[lwrite] = aworkB[l]; 2591 cols[lwrite++] = ctmp; 2592 } 2593 } 2594 for (l=0; l<nzA; l++){ 2595 vals[lwrite] = aworkA[l]; 2596 cols[lwrite++] = cstart + cworkA[l]; 2597 } 2598 for (l=0; l<nzB; l++) { 2599 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2600 vals[lwrite] = aworkB[l]; 2601 cols[lwrite++] = ctmp; 2602 } 2603 } 2604 vals += ncols; 2605 cols += ncols; 2606 rptr[i+1] = rptr[i] + ncols; 2607 if (rownz_max < ncols) rownz_max = ncols; 2608 } 2609 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2610 } else { /* only copy matrix values into sbuf_a */ 2611 rptr = sbuf_j; 2612 vals = sbuf_a; 2613 rptr[0] = 0; 2614 for (i=0; i<rend-rstart; i++){ 2615 row = i + rstart; 2616 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2617 ncols = nzA + nzB; 2618 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2619 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2620 lwrite = 0; 2621 for (l=0; l<nzB; l++) { 2622 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2623 } 2624 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2625 for (l=0; l<nzB; l++) { 2626 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2627 } 2628 vals += ncols; 2629 rptr[i+1] = rptr[i] + ncols; 2630 } 2631 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2632 2633 /* send nzlocal to others, and recv other's nzlocal */ 2634 /*--------------------------------------------------*/ 2635 if (reuse == MAT_INITIAL_MATRIX){ 2636 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2637 s_waits2 = s_waits3 + nsends; 2638 s_waits1 = s_waits2 + nsends; 2639 r_waits1 = s_waits1 + nsends; 2640 r_waits2 = r_waits1 + nrecvs; 2641 r_waits3 = r_waits2 + nrecvs; 2642 } else { 2643 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2644 r_waits3 = s_waits3 + nsends; 2645 } 2646 2647 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2648 if (reuse == MAT_INITIAL_MATRIX){ 2649 /* get new tags to keep the communication clean */ 2650 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2651 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2652 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2653 2654 /* post receives of other's nzlocal */ 2655 for (i=0; i<nrecvs; i++){ 2656 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2657 } 2658 /* send nzlocal to others */ 2659 for (i=0; i<nsends; i++){ 2660 sbuf_nz[i] = nzlocal; 2661 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2662 } 2663 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2664 count = nrecvs; 2665 while (count) { 2666 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2667 recv_rank[imdex] = recv_status.MPI_SOURCE; 2668 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2669 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2670 2671 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2672 rbuf_nz[imdex] += i + 2; 2673 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2674 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2675 count--; 2676 } 2677 /* wait on sends of nzlocal */ 2678 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2679 /* send mat->i,j to others, and recv from other's */ 2680 /*------------------------------------------------*/ 2681 for (i=0; i<nsends; i++){ 2682 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2683 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2684 } 2685 /* wait on receives of mat->i,j */ 2686 /*------------------------------*/ 2687 count = nrecvs; 2688 while (count) { 2689 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2690 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2691 count--; 2692 } 2693 /* wait on sends of mat->i,j */ 2694 /*---------------------------*/ 2695 if (nsends) { 2696 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2697 } 2698 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2699 2700 /* post receives, send and receive mat->a */ 2701 /*----------------------------------------*/ 2702 for (imdex=0; imdex<nrecvs; imdex++) { 2703 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2704 } 2705 for (i=0; i<nsends; i++){ 2706 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2707 } 2708 count = nrecvs; 2709 while (count) { 2710 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2711 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2712 count--; 2713 } 2714 if (nsends) { 2715 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2716 } 2717 2718 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2719 2720 /* create redundant matrix */ 2721 /*-------------------------*/ 2722 if (reuse == MAT_INITIAL_MATRIX){ 2723 /* compute rownz_max for preallocation */ 2724 for (imdex=0; imdex<nrecvs; imdex++){ 2725 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2726 rptr = rbuf_j[imdex]; 2727 for (i=0; i<j; i++){ 2728 ncols = rptr[i+1] - rptr[i]; 2729 if (rownz_max < ncols) rownz_max = ncols; 2730 } 2731 } 2732 2733 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2734 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2735 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2736 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2737 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2738 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2739 } else { 2740 C = *matredundant; 2741 } 2742 2743 /* insert local matrix entries */ 2744 rptr = sbuf_j; 2745 cols = sbuf_j + rend-rstart + 1; 2746 vals = sbuf_a; 2747 for (i=0; i<rend-rstart; i++){ 2748 row = i + rstart; 2749 ncols = rptr[i+1] - rptr[i]; 2750 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2751 vals += ncols; 2752 cols += ncols; 2753 } 2754 /* insert received matrix entries */ 2755 for (imdex=0; imdex<nrecvs; imdex++){ 2756 rstart = rowrange[recv_rank[imdex]]; 2757 rend = rowrange[recv_rank[imdex]+1]; 2758 rptr = rbuf_j[imdex]; 2759 cols = rbuf_j[imdex] + rend-rstart + 1; 2760 vals = rbuf_a[imdex]; 2761 for (i=0; i<rend-rstart; i++){ 2762 row = i + rstart; 2763 ncols = rptr[i+1] - rptr[i]; 2764 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2765 vals += ncols; 2766 cols += ncols; 2767 } 2768 } 2769 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2770 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2771 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2772 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2773 if (reuse == MAT_INITIAL_MATRIX) { 2774 PetscContainer container; 2775 *matredundant = C; 2776 /* create a supporting struct and attach it to C for reuse */ 2777 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2778 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2779 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2780 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2781 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2782 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2783 2784 redund->nzlocal = nzlocal; 2785 redund->nsends = nsends; 2786 redund->nrecvs = nrecvs; 2787 redund->send_rank = send_rank; 2788 redund->recv_rank = recv_rank; 2789 redund->sbuf_nz = sbuf_nz; 2790 redund->rbuf_nz = rbuf_nz; 2791 redund->sbuf_j = sbuf_j; 2792 redund->sbuf_a = sbuf_a; 2793 redund->rbuf_j = rbuf_j; 2794 redund->rbuf_a = rbuf_a; 2795 2796 redund->Destroy = C->ops->destroy; 2797 C->ops->destroy = MatDestroy_MatRedundant; 2798 } 2799 PetscFunctionReturn(0); 2800 } 2801 2802 #undef __FUNCT__ 2803 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2804 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2805 { 2806 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2807 PetscErrorCode ierr; 2808 PetscInt i,*idxb = 0; 2809 PetscScalar *va,*vb; 2810 Vec vtmp; 2811 2812 PetscFunctionBegin; 2813 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2814 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2815 if (idx) { 2816 for (i=0; i<A->rmap->n; i++) { 2817 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2818 } 2819 } 2820 2821 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2822 if (idx) { 2823 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2824 } 2825 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2826 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2827 2828 for (i=0; i<A->rmap->n; i++){ 2829 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2830 va[i] = vb[i]; 2831 if (idx) idx[i] = a->garray[idxb[i]]; 2832 } 2833 } 2834 2835 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2836 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2837 ierr = PetscFree(idxb);CHKERRQ(ierr); 2838 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2839 PetscFunctionReturn(0); 2840 } 2841 2842 #undef __FUNCT__ 2843 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2844 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2845 { 2846 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2847 PetscErrorCode ierr; 2848 PetscInt i,*idxb = 0; 2849 PetscScalar *va,*vb; 2850 Vec vtmp; 2851 2852 PetscFunctionBegin; 2853 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2854 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2855 if (idx) { 2856 for (i=0; i<A->cmap->n; i++) { 2857 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2858 } 2859 } 2860 2861 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2862 if (idx) { 2863 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2864 } 2865 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2866 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2867 2868 for (i=0; i<A->rmap->n; i++){ 2869 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2870 va[i] = vb[i]; 2871 if (idx) idx[i] = a->garray[idxb[i]]; 2872 } 2873 } 2874 2875 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2876 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2877 ierr = PetscFree(idxb);CHKERRQ(ierr); 2878 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2879 PetscFunctionReturn(0); 2880 } 2881 2882 #undef __FUNCT__ 2883 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2884 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2885 { 2886 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2887 PetscInt n = A->rmap->n; 2888 PetscInt cstart = A->cmap->rstart; 2889 PetscInt *cmap = mat->garray; 2890 PetscInt *diagIdx, *offdiagIdx; 2891 Vec diagV, offdiagV; 2892 PetscScalar *a, *diagA, *offdiagA; 2893 PetscInt r; 2894 PetscErrorCode ierr; 2895 2896 PetscFunctionBegin; 2897 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2898 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2899 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2900 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2901 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2902 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2903 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2904 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2905 for(r = 0; r < n; ++r) { 2906 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2907 a[r] = diagA[r]; 2908 idx[r] = cstart + diagIdx[r]; 2909 } else { 2910 a[r] = offdiagA[r]; 2911 idx[r] = cmap[offdiagIdx[r]]; 2912 } 2913 } 2914 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2915 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2916 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2917 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2918 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2919 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2920 PetscFunctionReturn(0); 2921 } 2922 2923 #undef __FUNCT__ 2924 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2925 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2926 { 2927 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2928 PetscInt n = A->rmap->n; 2929 PetscInt cstart = A->cmap->rstart; 2930 PetscInt *cmap = mat->garray; 2931 PetscInt *diagIdx, *offdiagIdx; 2932 Vec diagV, offdiagV; 2933 PetscScalar *a, *diagA, *offdiagA; 2934 PetscInt r; 2935 PetscErrorCode ierr; 2936 2937 PetscFunctionBegin; 2938 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2939 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2940 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2941 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2942 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2943 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2944 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2945 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2946 for(r = 0; r < n; ++r) { 2947 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2948 a[r] = diagA[r]; 2949 idx[r] = cstart + diagIdx[r]; 2950 } else { 2951 a[r] = offdiagA[r]; 2952 idx[r] = cmap[offdiagIdx[r]]; 2953 } 2954 } 2955 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2956 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2957 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2958 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2959 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2960 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2961 PetscFunctionReturn(0); 2962 } 2963 2964 #undef __FUNCT__ 2965 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2966 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2967 { 2968 PetscErrorCode ierr; 2969 Mat *dummy; 2970 2971 PetscFunctionBegin; 2972 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2973 *newmat = *dummy; 2974 ierr = PetscFree(dummy);CHKERRQ(ierr); 2975 PetscFunctionReturn(0); 2976 } 2977 2978 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2979 2980 #undef __FUNCT__ 2981 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2982 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2983 { 2984 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2985 PetscErrorCode ierr; 2986 2987 PetscFunctionBegin; 2988 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2989 PetscFunctionReturn(0); 2990 } 2991 2992 2993 /* -------------------------------------------------------------------*/ 2994 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2995 MatGetRow_MPIAIJ, 2996 MatRestoreRow_MPIAIJ, 2997 MatMult_MPIAIJ, 2998 /* 4*/ MatMultAdd_MPIAIJ, 2999 MatMultTranspose_MPIAIJ, 3000 MatMultTransposeAdd_MPIAIJ, 3001 #ifdef PETSC_HAVE_PBGL 3002 MatSolve_MPIAIJ, 3003 #else 3004 0, 3005 #endif 3006 0, 3007 0, 3008 /*10*/ 0, 3009 0, 3010 0, 3011 MatSOR_MPIAIJ, 3012 MatTranspose_MPIAIJ, 3013 /*15*/ MatGetInfo_MPIAIJ, 3014 MatEqual_MPIAIJ, 3015 MatGetDiagonal_MPIAIJ, 3016 MatDiagonalScale_MPIAIJ, 3017 MatNorm_MPIAIJ, 3018 /*20*/ MatAssemblyBegin_MPIAIJ, 3019 MatAssemblyEnd_MPIAIJ, 3020 MatSetOption_MPIAIJ, 3021 MatZeroEntries_MPIAIJ, 3022 /*24*/ MatZeroRows_MPIAIJ, 3023 0, 3024 #ifdef PETSC_HAVE_PBGL 3025 0, 3026 #else 3027 0, 3028 #endif 3029 0, 3030 0, 3031 /*29*/ MatSetUp_MPIAIJ, 3032 #ifdef PETSC_HAVE_PBGL 3033 0, 3034 #else 3035 0, 3036 #endif 3037 0, 3038 0, 3039 0, 3040 /*34*/ MatDuplicate_MPIAIJ, 3041 0, 3042 0, 3043 0, 3044 0, 3045 /*39*/ MatAXPY_MPIAIJ, 3046 MatGetSubMatrices_MPIAIJ, 3047 MatIncreaseOverlap_MPIAIJ, 3048 MatGetValues_MPIAIJ, 3049 MatCopy_MPIAIJ, 3050 /*44*/ MatGetRowMax_MPIAIJ, 3051 MatScale_MPIAIJ, 3052 0, 3053 0, 3054 MatZeroRowsColumns_MPIAIJ, 3055 /*49*/ 0, 3056 0, 3057 0, 3058 0, 3059 0, 3060 /*54*/ MatFDColoringCreate_MPIAIJ, 3061 0, 3062 MatSetUnfactored_MPIAIJ, 3063 0, /* MatPermute_MPIAIJ, impl currently broken */ 3064 0, 3065 /*59*/ MatGetSubMatrix_MPIAIJ, 3066 MatDestroy_MPIAIJ, 3067 MatView_MPIAIJ, 3068 0, 3069 0, 3070 /*64*/ 0, 3071 0, 3072 0, 3073 0, 3074 0, 3075 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3076 MatGetRowMinAbs_MPIAIJ, 3077 0, 3078 MatSetColoring_MPIAIJ, 3079 #if defined(PETSC_HAVE_ADIC) 3080 MatSetValuesAdic_MPIAIJ, 3081 #else 3082 0, 3083 #endif 3084 MatSetValuesAdifor_MPIAIJ, 3085 /*75*/ MatFDColoringApply_AIJ, 3086 0, 3087 0, 3088 0, 3089 0, 3090 /*80*/ 0, 3091 0, 3092 0, 3093 /*83*/ MatLoad_MPIAIJ, 3094 0, 3095 0, 3096 0, 3097 0, 3098 0, 3099 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3100 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3101 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3102 MatPtAP_Basic, 3103 MatPtAPSymbolic_MPIAIJ, 3104 /*94*/ MatPtAPNumeric_MPIAIJ, 3105 0, 3106 0, 3107 0, 3108 0, 3109 /*99*/ 0, 3110 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3111 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3112 MatConjugate_MPIAIJ, 3113 0, 3114 /*104*/MatSetValuesRow_MPIAIJ, 3115 MatRealPart_MPIAIJ, 3116 MatImaginaryPart_MPIAIJ, 3117 0, 3118 0, 3119 /*109*/0, 3120 MatGetRedundantMatrix_MPIAIJ, 3121 MatGetRowMin_MPIAIJ, 3122 0, 3123 0, 3124 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3125 0, 3126 0, 3127 0, 3128 0, 3129 /*119*/0, 3130 0, 3131 0, 3132 0, 3133 MatGetMultiProcBlock_MPIAIJ, 3134 /*124*/MatFindNonZeroRows_MPIAIJ, 3135 MatGetColumnNorms_MPIAIJ, 3136 MatInvertBlockDiagonal_MPIAIJ, 3137 0, 3138 MatGetSubMatricesParallel_MPIAIJ, 3139 /*129*/0, 3140 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3141 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3142 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3143 0, 3144 /*134*/0, 3145 0, 3146 0, 3147 0, 3148 0 3149 }; 3150 3151 /* ----------------------------------------------------------------------------------------*/ 3152 3153 EXTERN_C_BEGIN 3154 #undef __FUNCT__ 3155 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3156 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3157 { 3158 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3159 PetscErrorCode ierr; 3160 3161 PetscFunctionBegin; 3162 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3163 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3164 PetscFunctionReturn(0); 3165 } 3166 EXTERN_C_END 3167 3168 EXTERN_C_BEGIN 3169 #undef __FUNCT__ 3170 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3171 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3172 { 3173 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3174 PetscErrorCode ierr; 3175 3176 PetscFunctionBegin; 3177 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3178 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3179 PetscFunctionReturn(0); 3180 } 3181 EXTERN_C_END 3182 3183 EXTERN_C_BEGIN 3184 #undef __FUNCT__ 3185 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3186 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3187 { 3188 Mat_MPIAIJ *b; 3189 PetscErrorCode ierr; 3190 PetscInt i; 3191 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3192 3193 PetscFunctionBegin; 3194 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3195 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3196 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3197 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3198 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3199 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3200 3201 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3202 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3203 if (d_nnz) { 3204 for (i=0; i<B->rmap->n; i++) { 3205 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3206 } 3207 } 3208 if (o_nnz) { 3209 for (i=0; i<B->rmap->n; i++) { 3210 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3211 } 3212 } 3213 b = (Mat_MPIAIJ*)B->data; 3214 3215 if (!B->preallocated) { 3216 /* Explicitly create 2 MATSEQAIJ matrices. */ 3217 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3218 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3219 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3220 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3221 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3222 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3223 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3224 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3225 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3226 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3227 } 3228 3229 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3230 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3231 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3232 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3233 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3234 B->preallocated = PETSC_TRUE; 3235 PetscFunctionReturn(0); 3236 } 3237 EXTERN_C_END 3238 3239 #undef __FUNCT__ 3240 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3241 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3242 { 3243 Mat mat; 3244 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3245 PetscErrorCode ierr; 3246 3247 PetscFunctionBegin; 3248 *newmat = 0; 3249 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3250 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3251 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3252 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3253 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3254 a = (Mat_MPIAIJ*)mat->data; 3255 3256 mat->factortype = matin->factortype; 3257 mat->rmap->bs = matin->rmap->bs; 3258 mat->cmap->bs = matin->cmap->bs; 3259 mat->assembled = PETSC_TRUE; 3260 mat->insertmode = NOT_SET_VALUES; 3261 mat->preallocated = PETSC_TRUE; 3262 3263 a->size = oldmat->size; 3264 a->rank = oldmat->rank; 3265 a->donotstash = oldmat->donotstash; 3266 a->roworiented = oldmat->roworiented; 3267 a->rowindices = 0; 3268 a->rowvalues = 0; 3269 a->getrowactive = PETSC_FALSE; 3270 3271 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3272 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3273 3274 if (oldmat->colmap) { 3275 #if defined (PETSC_USE_CTABLE) 3276 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3277 #else 3278 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3279 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3280 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3281 #endif 3282 } else a->colmap = 0; 3283 if (oldmat->garray) { 3284 PetscInt len; 3285 len = oldmat->B->cmap->n; 3286 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3287 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3288 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3289 } else a->garray = 0; 3290 3291 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3292 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3293 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3294 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3295 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3296 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3297 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3298 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3299 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3300 *newmat = mat; 3301 PetscFunctionReturn(0); 3302 } 3303 3304 3305 3306 #undef __FUNCT__ 3307 #define __FUNCT__ "MatLoad_MPIAIJ" 3308 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3309 { 3310 PetscScalar *vals,*svals; 3311 MPI_Comm comm = ((PetscObject)viewer)->comm; 3312 PetscErrorCode ierr; 3313 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3314 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3315 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3316 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3317 PetscInt cend,cstart,n,*rowners,sizesset=1; 3318 int fd; 3319 PetscInt bs = 1; 3320 3321 PetscFunctionBegin; 3322 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3323 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3324 if (!rank) { 3325 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3326 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3327 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3328 } 3329 3330 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3331 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3332 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3333 3334 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3335 3336 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3337 M = header[1]; N = header[2]; 3338 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3339 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3340 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3341 3342 /* If global sizes are set, check if they are consistent with that given in the file */ 3343 if (sizesset) { 3344 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3345 } 3346 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3347 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3348 3349 /* determine ownership of all (block) rows */ 3350 if( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3351 if (newMat->rmap->n < 0 ) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3352 else m = newMat->rmap->n; /* Set by user */ 3353 3354 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3355 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3356 3357 /* First process needs enough room for process with most rows */ 3358 if (!rank) { 3359 mmax = rowners[1]; 3360 for (i=2; i<size; i++) { 3361 mmax = PetscMax(mmax,rowners[i]); 3362 } 3363 } else mmax = m; 3364 3365 rowners[0] = 0; 3366 for (i=2; i<=size; i++) { 3367 rowners[i] += rowners[i-1]; 3368 } 3369 rstart = rowners[rank]; 3370 rend = rowners[rank+1]; 3371 3372 /* distribute row lengths to all processors */ 3373 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3374 if (!rank) { 3375 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3376 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3377 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3378 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3379 for (j=0; j<m; j++) { 3380 procsnz[0] += ourlens[j]; 3381 } 3382 for (i=1; i<size; i++) { 3383 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3384 /* calculate the number of nonzeros on each processor */ 3385 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3386 procsnz[i] += rowlengths[j]; 3387 } 3388 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3389 } 3390 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3391 } else { 3392 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3393 } 3394 3395 if (!rank) { 3396 /* determine max buffer needed and allocate it */ 3397 maxnz = 0; 3398 for (i=0; i<size; i++) { 3399 maxnz = PetscMax(maxnz,procsnz[i]); 3400 } 3401 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3402 3403 /* read in my part of the matrix column indices */ 3404 nz = procsnz[0]; 3405 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3406 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3407 3408 /* read in every one elses and ship off */ 3409 for (i=1; i<size; i++) { 3410 nz = procsnz[i]; 3411 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3412 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3413 } 3414 ierr = PetscFree(cols);CHKERRQ(ierr); 3415 } else { 3416 /* determine buffer space needed for message */ 3417 nz = 0; 3418 for (i=0; i<m; i++) { 3419 nz += ourlens[i]; 3420 } 3421 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3422 3423 /* receive message of column indices*/ 3424 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3425 } 3426 3427 /* determine column ownership if matrix is not square */ 3428 if (N != M) { 3429 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3430 else n = newMat->cmap->n; 3431 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3432 cstart = cend - n; 3433 } else { 3434 cstart = rstart; 3435 cend = rend; 3436 n = cend - cstart; 3437 } 3438 3439 /* loop over local rows, determining number of off diagonal entries */ 3440 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3441 jj = 0; 3442 for (i=0; i<m; i++) { 3443 for (j=0; j<ourlens[i]; j++) { 3444 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3445 jj++; 3446 } 3447 } 3448 3449 for (i=0; i<m; i++) { 3450 ourlens[i] -= offlens[i]; 3451 } 3452 if (!sizesset) { 3453 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3454 } 3455 3456 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3457 3458 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3459 3460 for (i=0; i<m; i++) { 3461 ourlens[i] += offlens[i]; 3462 } 3463 3464 if (!rank) { 3465 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3466 3467 /* read in my part of the matrix numerical values */ 3468 nz = procsnz[0]; 3469 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3470 3471 /* insert into matrix */ 3472 jj = rstart; 3473 smycols = mycols; 3474 svals = vals; 3475 for (i=0; i<m; i++) { 3476 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3477 smycols += ourlens[i]; 3478 svals += ourlens[i]; 3479 jj++; 3480 } 3481 3482 /* read in other processors and ship out */ 3483 for (i=1; i<size; i++) { 3484 nz = procsnz[i]; 3485 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3486 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3487 } 3488 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3489 } else { 3490 /* receive numeric values */ 3491 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3492 3493 /* receive message of values*/ 3494 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3495 3496 /* insert into matrix */ 3497 jj = rstart; 3498 smycols = mycols; 3499 svals = vals; 3500 for (i=0; i<m; i++) { 3501 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3502 smycols += ourlens[i]; 3503 svals += ourlens[i]; 3504 jj++; 3505 } 3506 } 3507 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3508 ierr = PetscFree(vals);CHKERRQ(ierr); 3509 ierr = PetscFree(mycols);CHKERRQ(ierr); 3510 ierr = PetscFree(rowners);CHKERRQ(ierr); 3511 3512 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3513 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3514 3515 PetscFunctionReturn(0); 3516 } 3517 3518 #undef __FUNCT__ 3519 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3520 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3521 { 3522 PetscErrorCode ierr; 3523 IS iscol_local; 3524 PetscInt csize; 3525 3526 PetscFunctionBegin; 3527 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3528 if (call == MAT_REUSE_MATRIX) { 3529 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3530 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3531 } else { 3532 PetscInt cbs; 3533 ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr); 3534 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3535 ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr); 3536 } 3537 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3538 if (call == MAT_INITIAL_MATRIX) { 3539 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3540 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3541 } 3542 PetscFunctionReturn(0); 3543 } 3544 3545 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3546 #undef __FUNCT__ 3547 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3548 /* 3549 Not great since it makes two copies of the submatrix, first an SeqAIJ 3550 in local and then by concatenating the local matrices the end result. 3551 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3552 3553 Note: This requires a sequential iscol with all indices. 3554 */ 3555 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3556 { 3557 PetscErrorCode ierr; 3558 PetscMPIInt rank,size; 3559 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3560 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3561 PetscBool allcolumns, colflag; 3562 Mat M,Mreuse; 3563 MatScalar *vwork,*aa; 3564 MPI_Comm comm = ((PetscObject)mat)->comm; 3565 Mat_SeqAIJ *aij; 3566 3567 3568 PetscFunctionBegin; 3569 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3570 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3571 3572 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3573 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3574 if (colflag && ncol == mat->cmap->N){ 3575 allcolumns = PETSC_TRUE; 3576 } else { 3577 allcolumns = PETSC_FALSE; 3578 } 3579 if (call == MAT_REUSE_MATRIX) { 3580 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3581 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3582 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3583 } else { 3584 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3585 } 3586 3587 /* 3588 m - number of local rows 3589 n - number of columns (same on all processors) 3590 rstart - first row in new global matrix generated 3591 */ 3592 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3593 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3594 if (call == MAT_INITIAL_MATRIX) { 3595 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3596 ii = aij->i; 3597 jj = aij->j; 3598 3599 /* 3600 Determine the number of non-zeros in the diagonal and off-diagonal 3601 portions of the matrix in order to do correct preallocation 3602 */ 3603 3604 /* first get start and end of "diagonal" columns */ 3605 if (csize == PETSC_DECIDE) { 3606 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3607 if (mglobal == n) { /* square matrix */ 3608 nlocal = m; 3609 } else { 3610 nlocal = n/size + ((n % size) > rank); 3611 } 3612 } else { 3613 nlocal = csize; 3614 } 3615 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3616 rstart = rend - nlocal; 3617 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3618 3619 /* next, compute all the lengths */ 3620 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3621 olens = dlens + m; 3622 for (i=0; i<m; i++) { 3623 jend = ii[i+1] - ii[i]; 3624 olen = 0; 3625 dlen = 0; 3626 for (j=0; j<jend; j++) { 3627 if (*jj < rstart || *jj >= rend) olen++; 3628 else dlen++; 3629 jj++; 3630 } 3631 olens[i] = olen; 3632 dlens[i] = dlen; 3633 } 3634 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3635 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3636 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3637 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3638 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3639 ierr = PetscFree(dlens);CHKERRQ(ierr); 3640 } else { 3641 PetscInt ml,nl; 3642 3643 M = *newmat; 3644 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3645 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3646 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3647 /* 3648 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3649 rather than the slower MatSetValues(). 3650 */ 3651 M->was_assembled = PETSC_TRUE; 3652 M->assembled = PETSC_FALSE; 3653 } 3654 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3655 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3656 ii = aij->i; 3657 jj = aij->j; 3658 aa = aij->a; 3659 for (i=0; i<m; i++) { 3660 row = rstart + i; 3661 nz = ii[i+1] - ii[i]; 3662 cwork = jj; jj += nz; 3663 vwork = aa; aa += nz; 3664 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3665 } 3666 3667 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3668 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3669 *newmat = M; 3670 3671 /* save submatrix used in processor for next request */ 3672 if (call == MAT_INITIAL_MATRIX) { 3673 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3674 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3675 } 3676 3677 PetscFunctionReturn(0); 3678 } 3679 3680 EXTERN_C_BEGIN 3681 #undef __FUNCT__ 3682 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3683 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3684 { 3685 PetscInt m,cstart, cend,j,nnz,i,d; 3686 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3687 const PetscInt *JJ; 3688 PetscScalar *values; 3689 PetscErrorCode ierr; 3690 3691 PetscFunctionBegin; 3692 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3693 3694 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3695 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3696 m = B->rmap->n; 3697 cstart = B->cmap->rstart; 3698 cend = B->cmap->rend; 3699 rstart = B->rmap->rstart; 3700 3701 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3702 3703 #if defined(PETSC_USE_DEBUGGING) 3704 for (i=0; i<m; i++) { 3705 nnz = Ii[i+1]- Ii[i]; 3706 JJ = J + Ii[i]; 3707 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3708 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3709 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3710 } 3711 #endif 3712 3713 for (i=0; i<m; i++) { 3714 nnz = Ii[i+1]- Ii[i]; 3715 JJ = J + Ii[i]; 3716 nnz_max = PetscMax(nnz_max,nnz); 3717 d = 0; 3718 for (j=0; j<nnz; j++) { 3719 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3720 } 3721 d_nnz[i] = d; 3722 o_nnz[i] = nnz - d; 3723 } 3724 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3725 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3726 3727 if (v) values = (PetscScalar*)v; 3728 else { 3729 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3730 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3731 } 3732 3733 for (i=0; i<m; i++) { 3734 ii = i + rstart; 3735 nnz = Ii[i+1]- Ii[i]; 3736 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3737 } 3738 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3739 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3740 3741 if (!v) { 3742 ierr = PetscFree(values);CHKERRQ(ierr); 3743 } 3744 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3745 PetscFunctionReturn(0); 3746 } 3747 EXTERN_C_END 3748 3749 #undef __FUNCT__ 3750 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3751 /*@ 3752 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3753 (the default parallel PETSc format). 3754 3755 Collective on MPI_Comm 3756 3757 Input Parameters: 3758 + B - the matrix 3759 . i - the indices into j for the start of each local row (starts with zero) 3760 . j - the column indices for each local row (starts with zero) 3761 - v - optional values in the matrix 3762 3763 Level: developer 3764 3765 Notes: 3766 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3767 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3768 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3769 3770 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3771 3772 The format which is used for the sparse matrix input, is equivalent to a 3773 row-major ordering.. i.e for the following matrix, the input data expected is 3774 as shown: 3775 3776 1 0 0 3777 2 0 3 P0 3778 ------- 3779 4 5 6 P1 3780 3781 Process0 [P0]: rows_owned=[0,1] 3782 i = {0,1,3} [size = nrow+1 = 2+1] 3783 j = {0,0,2} [size = nz = 6] 3784 v = {1,2,3} [size = nz = 6] 3785 3786 Process1 [P1]: rows_owned=[2] 3787 i = {0,3} [size = nrow+1 = 1+1] 3788 j = {0,1,2} [size = nz = 6] 3789 v = {4,5,6} [size = nz = 6] 3790 3791 .keywords: matrix, aij, compressed row, sparse, parallel 3792 3793 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3794 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3795 @*/ 3796 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3797 { 3798 PetscErrorCode ierr; 3799 3800 PetscFunctionBegin; 3801 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3802 PetscFunctionReturn(0); 3803 } 3804 3805 #undef __FUNCT__ 3806 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3807 /*@C 3808 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3809 (the default parallel PETSc format). For good matrix assembly performance 3810 the user should preallocate the matrix storage by setting the parameters 3811 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3812 performance can be increased by more than a factor of 50. 3813 3814 Collective on MPI_Comm 3815 3816 Input Parameters: 3817 + A - the matrix 3818 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3819 (same value is used for all local rows) 3820 . d_nnz - array containing the number of nonzeros in the various rows of the 3821 DIAGONAL portion of the local submatrix (possibly different for each row) 3822 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3823 The size of this array is equal to the number of local rows, i.e 'm'. 3824 For matrices that will be factored, you must leave room for (and set) 3825 the diagonal entry even if it is zero. 3826 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3827 submatrix (same value is used for all local rows). 3828 - o_nnz - array containing the number of nonzeros in the various rows of the 3829 OFF-DIAGONAL portion of the local submatrix (possibly different for 3830 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3831 structure. The size of this array is equal to the number 3832 of local rows, i.e 'm'. 3833 3834 If the *_nnz parameter is given then the *_nz parameter is ignored 3835 3836 The AIJ format (also called the Yale sparse matrix format or 3837 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3838 storage. The stored row and column indices begin with zero. 3839 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3840 3841 The parallel matrix is partitioned such that the first m0 rows belong to 3842 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3843 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3844 3845 The DIAGONAL portion of the local submatrix of a processor can be defined 3846 as the submatrix which is obtained by extraction the part corresponding to 3847 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3848 first row that belongs to the processor, r2 is the last row belonging to 3849 the this processor, and c1-c2 is range of indices of the local part of a 3850 vector suitable for applying the matrix to. This is an mxn matrix. In the 3851 common case of a square matrix, the row and column ranges are the same and 3852 the DIAGONAL part is also square. The remaining portion of the local 3853 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3854 3855 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3856 3857 You can call MatGetInfo() to get information on how effective the preallocation was; 3858 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3859 You can also run with the option -info and look for messages with the string 3860 malloc in them to see if additional memory allocation was needed. 3861 3862 Example usage: 3863 3864 Consider the following 8x8 matrix with 34 non-zero values, that is 3865 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3866 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3867 as follows: 3868 3869 .vb 3870 1 2 0 | 0 3 0 | 0 4 3871 Proc0 0 5 6 | 7 0 0 | 8 0 3872 9 0 10 | 11 0 0 | 12 0 3873 ------------------------------------- 3874 13 0 14 | 15 16 17 | 0 0 3875 Proc1 0 18 0 | 19 20 21 | 0 0 3876 0 0 0 | 22 23 0 | 24 0 3877 ------------------------------------- 3878 Proc2 25 26 27 | 0 0 28 | 29 0 3879 30 0 0 | 31 32 33 | 0 34 3880 .ve 3881 3882 This can be represented as a collection of submatrices as: 3883 3884 .vb 3885 A B C 3886 D E F 3887 G H I 3888 .ve 3889 3890 Where the submatrices A,B,C are owned by proc0, D,E,F are 3891 owned by proc1, G,H,I are owned by proc2. 3892 3893 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3894 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3895 The 'M','N' parameters are 8,8, and have the same values on all procs. 3896 3897 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3898 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3899 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3900 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3901 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3902 matrix, ans [DF] as another SeqAIJ matrix. 3903 3904 When d_nz, o_nz parameters are specified, d_nz storage elements are 3905 allocated for every row of the local diagonal submatrix, and o_nz 3906 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3907 One way to choose d_nz and o_nz is to use the max nonzerors per local 3908 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3909 In this case, the values of d_nz,o_nz are: 3910 .vb 3911 proc0 : dnz = 2, o_nz = 2 3912 proc1 : dnz = 3, o_nz = 2 3913 proc2 : dnz = 1, o_nz = 4 3914 .ve 3915 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3916 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3917 for proc3. i.e we are using 12+15+10=37 storage locations to store 3918 34 values. 3919 3920 When d_nnz, o_nnz parameters are specified, the storage is specified 3921 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3922 In the above case the values for d_nnz,o_nnz are: 3923 .vb 3924 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3925 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3926 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3927 .ve 3928 Here the space allocated is sum of all the above values i.e 34, and 3929 hence pre-allocation is perfect. 3930 3931 Level: intermediate 3932 3933 .keywords: matrix, aij, compressed row, sparse, parallel 3934 3935 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3936 MPIAIJ, MatGetInfo() 3937 @*/ 3938 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3939 { 3940 PetscErrorCode ierr; 3941 3942 PetscFunctionBegin; 3943 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3944 PetscValidType(B,1); 3945 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3946 PetscFunctionReturn(0); 3947 } 3948 3949 #undef __FUNCT__ 3950 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3951 /*@ 3952 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3953 CSR format the local rows. 3954 3955 Collective on MPI_Comm 3956 3957 Input Parameters: 3958 + comm - MPI communicator 3959 . m - number of local rows (Cannot be PETSC_DECIDE) 3960 . n - This value should be the same as the local size used in creating the 3961 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3962 calculated if N is given) For square matrices n is almost always m. 3963 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3964 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3965 . i - row indices 3966 . j - column indices 3967 - a - matrix values 3968 3969 Output Parameter: 3970 . mat - the matrix 3971 3972 Level: intermediate 3973 3974 Notes: 3975 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3976 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3977 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3978 3979 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3980 3981 The format which is used for the sparse matrix input, is equivalent to a 3982 row-major ordering.. i.e for the following matrix, the input data expected is 3983 as shown: 3984 3985 1 0 0 3986 2 0 3 P0 3987 ------- 3988 4 5 6 P1 3989 3990 Process0 [P0]: rows_owned=[0,1] 3991 i = {0,1,3} [size = nrow+1 = 2+1] 3992 j = {0,0,2} [size = nz = 6] 3993 v = {1,2,3} [size = nz = 6] 3994 3995 Process1 [P1]: rows_owned=[2] 3996 i = {0,3} [size = nrow+1 = 1+1] 3997 j = {0,1,2} [size = nz = 6] 3998 v = {4,5,6} [size = nz = 6] 3999 4000 .keywords: matrix, aij, compressed row, sparse, parallel 4001 4002 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4003 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4004 @*/ 4005 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4006 { 4007 PetscErrorCode ierr; 4008 4009 PetscFunctionBegin; 4010 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4011 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4012 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4013 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4014 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 4015 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4016 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4017 PetscFunctionReturn(0); 4018 } 4019 4020 #undef __FUNCT__ 4021 #define __FUNCT__ "MatCreateAIJ" 4022 /*@C 4023 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4024 (the default parallel PETSc format). For good matrix assembly performance 4025 the user should preallocate the matrix storage by setting the parameters 4026 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4027 performance can be increased by more than a factor of 50. 4028 4029 Collective on MPI_Comm 4030 4031 Input Parameters: 4032 + comm - MPI communicator 4033 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4034 This value should be the same as the local size used in creating the 4035 y vector for the matrix-vector product y = Ax. 4036 . n - This value should be the same as the local size used in creating the 4037 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4038 calculated if N is given) For square matrices n is almost always m. 4039 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4040 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4041 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4042 (same value is used for all local rows) 4043 . d_nnz - array containing the number of nonzeros in the various rows of the 4044 DIAGONAL portion of the local submatrix (possibly different for each row) 4045 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4046 The size of this array is equal to the number of local rows, i.e 'm'. 4047 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4048 submatrix (same value is used for all local rows). 4049 - o_nnz - array containing the number of nonzeros in the various rows of the 4050 OFF-DIAGONAL portion of the local submatrix (possibly different for 4051 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4052 structure. The size of this array is equal to the number 4053 of local rows, i.e 'm'. 4054 4055 Output Parameter: 4056 . A - the matrix 4057 4058 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4059 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4060 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4061 4062 Notes: 4063 If the *_nnz parameter is given then the *_nz parameter is ignored 4064 4065 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4066 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4067 storage requirements for this matrix. 4068 4069 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4070 processor than it must be used on all processors that share the object for 4071 that argument. 4072 4073 The user MUST specify either the local or global matrix dimensions 4074 (possibly both). 4075 4076 The parallel matrix is partitioned across processors such that the 4077 first m0 rows belong to process 0, the next m1 rows belong to 4078 process 1, the next m2 rows belong to process 2 etc.. where 4079 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4080 values corresponding to [m x N] submatrix. 4081 4082 The columns are logically partitioned with the n0 columns belonging 4083 to 0th partition, the next n1 columns belonging to the next 4084 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4085 4086 The DIAGONAL portion of the local submatrix on any given processor 4087 is the submatrix corresponding to the rows and columns m,n 4088 corresponding to the given processor. i.e diagonal matrix on 4089 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4090 etc. The remaining portion of the local submatrix [m x (N-n)] 4091 constitute the OFF-DIAGONAL portion. The example below better 4092 illustrates this concept. 4093 4094 For a square global matrix we define each processor's diagonal portion 4095 to be its local rows and the corresponding columns (a square submatrix); 4096 each processor's off-diagonal portion encompasses the remainder of the 4097 local matrix (a rectangular submatrix). 4098 4099 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4100 4101 When calling this routine with a single process communicator, a matrix of 4102 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4103 type of communicator, use the construction mechanism: 4104 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4105 4106 By default, this format uses inodes (identical nodes) when possible. 4107 We search for consecutive rows with the same nonzero structure, thereby 4108 reusing matrix information to achieve increased efficiency. 4109 4110 Options Database Keys: 4111 + -mat_no_inode - Do not use inodes 4112 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4113 - -mat_aij_oneindex - Internally use indexing starting at 1 4114 rather than 0. Note that when calling MatSetValues(), 4115 the user still MUST index entries starting at 0! 4116 4117 4118 Example usage: 4119 4120 Consider the following 8x8 matrix with 34 non-zero values, that is 4121 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4122 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4123 as follows: 4124 4125 .vb 4126 1 2 0 | 0 3 0 | 0 4 4127 Proc0 0 5 6 | 7 0 0 | 8 0 4128 9 0 10 | 11 0 0 | 12 0 4129 ------------------------------------- 4130 13 0 14 | 15 16 17 | 0 0 4131 Proc1 0 18 0 | 19 20 21 | 0 0 4132 0 0 0 | 22 23 0 | 24 0 4133 ------------------------------------- 4134 Proc2 25 26 27 | 0 0 28 | 29 0 4135 30 0 0 | 31 32 33 | 0 34 4136 .ve 4137 4138 This can be represented as a collection of submatrices as: 4139 4140 .vb 4141 A B C 4142 D E F 4143 G H I 4144 .ve 4145 4146 Where the submatrices A,B,C are owned by proc0, D,E,F are 4147 owned by proc1, G,H,I are owned by proc2. 4148 4149 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4150 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4151 The 'M','N' parameters are 8,8, and have the same values on all procs. 4152 4153 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4154 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4155 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4156 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4157 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4158 matrix, ans [DF] as another SeqAIJ matrix. 4159 4160 When d_nz, o_nz parameters are specified, d_nz storage elements are 4161 allocated for every row of the local diagonal submatrix, and o_nz 4162 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4163 One way to choose d_nz and o_nz is to use the max nonzerors per local 4164 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4165 In this case, the values of d_nz,o_nz are: 4166 .vb 4167 proc0 : dnz = 2, o_nz = 2 4168 proc1 : dnz = 3, o_nz = 2 4169 proc2 : dnz = 1, o_nz = 4 4170 .ve 4171 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4172 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4173 for proc3. i.e we are using 12+15+10=37 storage locations to store 4174 34 values. 4175 4176 When d_nnz, o_nnz parameters are specified, the storage is specified 4177 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4178 In the above case the values for d_nnz,o_nnz are: 4179 .vb 4180 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4181 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4182 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4183 .ve 4184 Here the space allocated is sum of all the above values i.e 34, and 4185 hence pre-allocation is perfect. 4186 4187 Level: intermediate 4188 4189 .keywords: matrix, aij, compressed row, sparse, parallel 4190 4191 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4192 MPIAIJ, MatCreateMPIAIJWithArrays() 4193 @*/ 4194 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4195 { 4196 PetscErrorCode ierr; 4197 PetscMPIInt size; 4198 4199 PetscFunctionBegin; 4200 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4201 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4202 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4203 if (size > 1) { 4204 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4205 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4206 } else { 4207 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4208 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4209 } 4210 PetscFunctionReturn(0); 4211 } 4212 4213 #undef __FUNCT__ 4214 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4215 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4216 { 4217 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4218 4219 PetscFunctionBegin; 4220 *Ad = a->A; 4221 *Ao = a->B; 4222 *colmap = a->garray; 4223 PetscFunctionReturn(0); 4224 } 4225 4226 #undef __FUNCT__ 4227 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4228 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4229 { 4230 PetscErrorCode ierr; 4231 PetscInt i; 4232 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4233 4234 PetscFunctionBegin; 4235 if (coloring->ctype == IS_COLORING_GLOBAL) { 4236 ISColoringValue *allcolors,*colors; 4237 ISColoring ocoloring; 4238 4239 /* set coloring for diagonal portion */ 4240 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4241 4242 /* set coloring for off-diagonal portion */ 4243 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4244 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4245 for (i=0; i<a->B->cmap->n; i++) { 4246 colors[i] = allcolors[a->garray[i]]; 4247 } 4248 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4249 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4250 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4251 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4252 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4253 ISColoringValue *colors; 4254 PetscInt *larray; 4255 ISColoring ocoloring; 4256 4257 /* set coloring for diagonal portion */ 4258 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4259 for (i=0; i<a->A->cmap->n; i++) { 4260 larray[i] = i + A->cmap->rstart; 4261 } 4262 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4263 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4264 for (i=0; i<a->A->cmap->n; i++) { 4265 colors[i] = coloring->colors[larray[i]]; 4266 } 4267 ierr = PetscFree(larray);CHKERRQ(ierr); 4268 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4269 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4270 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4271 4272 /* set coloring for off-diagonal portion */ 4273 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4274 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4275 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4276 for (i=0; i<a->B->cmap->n; i++) { 4277 colors[i] = coloring->colors[larray[i]]; 4278 } 4279 ierr = PetscFree(larray);CHKERRQ(ierr); 4280 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4281 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4282 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4283 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4284 4285 PetscFunctionReturn(0); 4286 } 4287 4288 #if defined(PETSC_HAVE_ADIC) 4289 #undef __FUNCT__ 4290 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4291 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4292 { 4293 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4294 PetscErrorCode ierr; 4295 4296 PetscFunctionBegin; 4297 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4298 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4299 PetscFunctionReturn(0); 4300 } 4301 #endif 4302 4303 #undef __FUNCT__ 4304 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4305 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4306 { 4307 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4308 PetscErrorCode ierr; 4309 4310 PetscFunctionBegin; 4311 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4312 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4313 PetscFunctionReturn(0); 4314 } 4315 4316 #undef __FUNCT__ 4317 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4318 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4319 { 4320 PetscErrorCode ierr; 4321 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4322 PetscInt *indx; 4323 4324 PetscFunctionBegin; 4325 /* This routine will ONLY return MPIAIJ type matrix */ 4326 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4327 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4328 if (n == PETSC_DECIDE){ 4329 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4330 } 4331 /* Check sum(n) = N */ 4332 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4333 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4334 4335 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4336 rstart -= m; 4337 4338 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4339 for (i=0;i<m;i++) { 4340 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4341 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4342 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4343 } 4344 4345 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4346 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4347 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4348 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4349 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4350 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4351 PetscFunctionReturn(0); 4352 } 4353 4354 #undef __FUNCT__ 4355 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4356 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4357 { 4358 PetscErrorCode ierr; 4359 PetscInt m,N,i,rstart,nnz,Ii; 4360 PetscInt *indx; 4361 PetscScalar *values; 4362 4363 PetscFunctionBegin; 4364 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4365 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4366 for (i=0;i<m;i++) { 4367 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4368 Ii = i + rstart; 4369 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4370 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4371 } 4372 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4373 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4374 PetscFunctionReturn(0); 4375 } 4376 4377 #undef __FUNCT__ 4378 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4379 /*@ 4380 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4381 matrices from each processor 4382 4383 Collective on MPI_Comm 4384 4385 Input Parameters: 4386 + comm - the communicators the parallel matrix will live on 4387 . inmat - the input sequential matrices 4388 . n - number of local columns (or PETSC_DECIDE) 4389 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4390 4391 Output Parameter: 4392 . outmat - the parallel matrix generated 4393 4394 Level: advanced 4395 4396 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4397 4398 @*/ 4399 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4400 { 4401 PetscErrorCode ierr; 4402 4403 PetscFunctionBegin; 4404 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4405 if (scall == MAT_INITIAL_MATRIX){ 4406 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4407 } 4408 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4409 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4410 PetscFunctionReturn(0); 4411 } 4412 4413 #undef __FUNCT__ 4414 #define __FUNCT__ "MatFileSplit" 4415 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4416 { 4417 PetscErrorCode ierr; 4418 PetscMPIInt rank; 4419 PetscInt m,N,i,rstart,nnz; 4420 size_t len; 4421 const PetscInt *indx; 4422 PetscViewer out; 4423 char *name; 4424 Mat B; 4425 const PetscScalar *values; 4426 4427 PetscFunctionBegin; 4428 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4429 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4430 /* Should this be the type of the diagonal block of A? */ 4431 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4432 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4433 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4434 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4435 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4436 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4437 for (i=0;i<m;i++) { 4438 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4439 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4440 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4441 } 4442 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4443 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4444 4445 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4446 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4447 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4448 sprintf(name,"%s.%d",outfile,rank); 4449 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4450 ierr = PetscFree(name); 4451 ierr = MatView(B,out);CHKERRQ(ierr); 4452 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4453 ierr = MatDestroy(&B);CHKERRQ(ierr); 4454 PetscFunctionReturn(0); 4455 } 4456 4457 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4458 #undef __FUNCT__ 4459 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4460 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4461 { 4462 PetscErrorCode ierr; 4463 Mat_Merge_SeqsToMPI *merge; 4464 PetscContainer container; 4465 4466 PetscFunctionBegin; 4467 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4468 if (container) { 4469 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4470 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4471 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4472 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4473 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4474 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4475 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4476 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4477 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4478 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4479 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4480 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4481 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4482 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4483 ierr = PetscFree(merge);CHKERRQ(ierr); 4484 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4485 } 4486 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4487 PetscFunctionReturn(0); 4488 } 4489 4490 #include <../src/mat/utils/freespace.h> 4491 #include <petscbt.h> 4492 4493 #undef __FUNCT__ 4494 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4495 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4496 { 4497 PetscErrorCode ierr; 4498 MPI_Comm comm=((PetscObject)mpimat)->comm; 4499 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4500 PetscMPIInt size,rank,taga,*len_s; 4501 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4502 PetscInt proc,m; 4503 PetscInt **buf_ri,**buf_rj; 4504 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4505 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4506 MPI_Request *s_waits,*r_waits; 4507 MPI_Status *status; 4508 MatScalar *aa=a->a; 4509 MatScalar **abuf_r,*ba_i; 4510 Mat_Merge_SeqsToMPI *merge; 4511 PetscContainer container; 4512 4513 PetscFunctionBegin; 4514 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4515 4516 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4517 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4518 4519 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4520 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4521 4522 bi = merge->bi; 4523 bj = merge->bj; 4524 buf_ri = merge->buf_ri; 4525 buf_rj = merge->buf_rj; 4526 4527 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4528 owners = merge->rowmap->range; 4529 len_s = merge->len_s; 4530 4531 /* send and recv matrix values */ 4532 /*-----------------------------*/ 4533 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4534 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4535 4536 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4537 for (proc=0,k=0; proc<size; proc++){ 4538 if (!len_s[proc]) continue; 4539 i = owners[proc]; 4540 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4541 k++; 4542 } 4543 4544 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4545 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4546 ierr = PetscFree(status);CHKERRQ(ierr); 4547 4548 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4549 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4550 4551 /* insert mat values of mpimat */ 4552 /*----------------------------*/ 4553 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4554 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4555 4556 for (k=0; k<merge->nrecv; k++){ 4557 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4558 nrows = *(buf_ri_k[k]); 4559 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4560 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4561 } 4562 4563 /* set values of ba */ 4564 m = merge->rowmap->n; 4565 for (i=0; i<m; i++) { 4566 arow = owners[rank] + i; 4567 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4568 bnzi = bi[i+1] - bi[i]; 4569 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4570 4571 /* add local non-zero vals of this proc's seqmat into ba */ 4572 anzi = ai[arow+1] - ai[arow]; 4573 aj = a->j + ai[arow]; 4574 aa = a->a + ai[arow]; 4575 nextaj = 0; 4576 for (j=0; nextaj<anzi; j++){ 4577 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4578 ba_i[j] += aa[nextaj++]; 4579 } 4580 } 4581 4582 /* add received vals into ba */ 4583 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4584 /* i-th row */ 4585 if (i == *nextrow[k]) { 4586 anzi = *(nextai[k]+1) - *nextai[k]; 4587 aj = buf_rj[k] + *(nextai[k]); 4588 aa = abuf_r[k] + *(nextai[k]); 4589 nextaj = 0; 4590 for (j=0; nextaj<anzi; j++){ 4591 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4592 ba_i[j] += aa[nextaj++]; 4593 } 4594 } 4595 nextrow[k]++; nextai[k]++; 4596 } 4597 } 4598 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4599 } 4600 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4601 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4602 4603 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4604 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4605 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4606 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4607 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4608 PetscFunctionReturn(0); 4609 } 4610 4611 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4612 4613 #undef __FUNCT__ 4614 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4615 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4616 { 4617 PetscErrorCode ierr; 4618 Mat B_mpi; 4619 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4620 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4621 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4622 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4623 PetscInt len,proc,*dnz,*onz,bs,cbs; 4624 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4625 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4626 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4627 MPI_Status *status; 4628 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4629 PetscBT lnkbt; 4630 Mat_Merge_SeqsToMPI *merge; 4631 PetscContainer container; 4632 4633 PetscFunctionBegin; 4634 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4635 4636 /* make sure it is a PETSc comm */ 4637 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4638 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4639 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4640 4641 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4642 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4643 4644 /* determine row ownership */ 4645 /*---------------------------------------------------------*/ 4646 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4647 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4648 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4649 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4650 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4651 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4652 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4653 4654 m = merge->rowmap->n; 4655 M = merge->rowmap->N; 4656 owners = merge->rowmap->range; 4657 4658 /* determine the number of messages to send, their lengths */ 4659 /*---------------------------------------------------------*/ 4660 len_s = merge->len_s; 4661 4662 len = 0; /* length of buf_si[] */ 4663 merge->nsend = 0; 4664 for (proc=0; proc<size; proc++){ 4665 len_si[proc] = 0; 4666 if (proc == rank){ 4667 len_s[proc] = 0; 4668 } else { 4669 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4670 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4671 } 4672 if (len_s[proc]) { 4673 merge->nsend++; 4674 nrows = 0; 4675 for (i=owners[proc]; i<owners[proc+1]; i++){ 4676 if (ai[i+1] > ai[i]) nrows++; 4677 } 4678 len_si[proc] = 2*(nrows+1); 4679 len += len_si[proc]; 4680 } 4681 } 4682 4683 /* determine the number and length of messages to receive for ij-structure */ 4684 /*-------------------------------------------------------------------------*/ 4685 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4686 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4687 4688 /* post the Irecv of j-structure */ 4689 /*-------------------------------*/ 4690 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4691 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4692 4693 /* post the Isend of j-structure */ 4694 /*--------------------------------*/ 4695 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4696 4697 for (proc=0, k=0; proc<size; proc++){ 4698 if (!len_s[proc]) continue; 4699 i = owners[proc]; 4700 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4701 k++; 4702 } 4703 4704 /* receives and sends of j-structure are complete */ 4705 /*------------------------------------------------*/ 4706 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4707 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4708 4709 /* send and recv i-structure */ 4710 /*---------------------------*/ 4711 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4712 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4713 4714 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4715 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4716 for (proc=0,k=0; proc<size; proc++){ 4717 if (!len_s[proc]) continue; 4718 /* form outgoing message for i-structure: 4719 buf_si[0]: nrows to be sent 4720 [1:nrows]: row index (global) 4721 [nrows+1:2*nrows+1]: i-structure index 4722 */ 4723 /*-------------------------------------------*/ 4724 nrows = len_si[proc]/2 - 1; 4725 buf_si_i = buf_si + nrows+1; 4726 buf_si[0] = nrows; 4727 buf_si_i[0] = 0; 4728 nrows = 0; 4729 for (i=owners[proc]; i<owners[proc+1]; i++){ 4730 anzi = ai[i+1] - ai[i]; 4731 if (anzi) { 4732 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4733 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4734 nrows++; 4735 } 4736 } 4737 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4738 k++; 4739 buf_si += len_si[proc]; 4740 } 4741 4742 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4743 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4744 4745 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4746 for (i=0; i<merge->nrecv; i++){ 4747 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4748 } 4749 4750 ierr = PetscFree(len_si);CHKERRQ(ierr); 4751 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4752 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4753 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4754 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4755 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4756 ierr = PetscFree(status);CHKERRQ(ierr); 4757 4758 /* compute a local seq matrix in each processor */ 4759 /*----------------------------------------------*/ 4760 /* allocate bi array and free space for accumulating nonzero column info */ 4761 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4762 bi[0] = 0; 4763 4764 /* create and initialize a linked list */ 4765 nlnk = N+1; 4766 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4767 4768 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4769 len = 0; 4770 len = ai[owners[rank+1]] - ai[owners[rank]]; 4771 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4772 current_space = free_space; 4773 4774 /* determine symbolic info for each local row */ 4775 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4776 4777 for (k=0; k<merge->nrecv; k++){ 4778 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4779 nrows = *buf_ri_k[k]; 4780 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4781 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4782 } 4783 4784 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4785 len = 0; 4786 for (i=0;i<m;i++) { 4787 bnzi = 0; 4788 /* add local non-zero cols of this proc's seqmat into lnk */ 4789 arow = owners[rank] + i; 4790 anzi = ai[arow+1] - ai[arow]; 4791 aj = a->j + ai[arow]; 4792 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4793 bnzi += nlnk; 4794 /* add received col data into lnk */ 4795 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4796 if (i == *nextrow[k]) { /* i-th row */ 4797 anzi = *(nextai[k]+1) - *nextai[k]; 4798 aj = buf_rj[k] + *nextai[k]; 4799 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4800 bnzi += nlnk; 4801 nextrow[k]++; nextai[k]++; 4802 } 4803 } 4804 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4805 4806 /* if free space is not available, make more free space */ 4807 if (current_space->local_remaining<bnzi) { 4808 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4809 nspacedouble++; 4810 } 4811 /* copy data into free space, then initialize lnk */ 4812 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4813 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4814 4815 current_space->array += bnzi; 4816 current_space->local_used += bnzi; 4817 current_space->local_remaining -= bnzi; 4818 4819 bi[i+1] = bi[i] + bnzi; 4820 } 4821 4822 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4823 4824 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4825 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4826 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4827 4828 /* create symbolic parallel matrix B_mpi */ 4829 /*---------------------------------------*/ 4830 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4831 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4832 if (n==PETSC_DECIDE) { 4833 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4834 } else { 4835 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4836 } 4837 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4838 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4839 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4840 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4841 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4842 4843 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4844 B_mpi->assembled = PETSC_FALSE; 4845 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4846 merge->bi = bi; 4847 merge->bj = bj; 4848 merge->buf_ri = buf_ri; 4849 merge->buf_rj = buf_rj; 4850 merge->coi = PETSC_NULL; 4851 merge->coj = PETSC_NULL; 4852 merge->owners_co = PETSC_NULL; 4853 4854 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4855 4856 /* attach the supporting struct to B_mpi for reuse */ 4857 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4858 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4859 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4860 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4861 *mpimat = B_mpi; 4862 4863 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4864 PetscFunctionReturn(0); 4865 } 4866 4867 #undef __FUNCT__ 4868 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4869 /*@C 4870 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4871 matrices from each processor 4872 4873 Collective on MPI_Comm 4874 4875 Input Parameters: 4876 + comm - the communicators the parallel matrix will live on 4877 . seqmat - the input sequential matrices 4878 . m - number of local rows (or PETSC_DECIDE) 4879 . n - number of local columns (or PETSC_DECIDE) 4880 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4881 4882 Output Parameter: 4883 . mpimat - the parallel matrix generated 4884 4885 Level: advanced 4886 4887 Notes: 4888 The dimensions of the sequential matrix in each processor MUST be the same. 4889 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4890 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4891 @*/ 4892 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4893 { 4894 PetscErrorCode ierr; 4895 PetscMPIInt size; 4896 4897 PetscFunctionBegin; 4898 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4899 if (size == 1){ 4900 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4901 if (scall == MAT_INITIAL_MATRIX){ 4902 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4903 } else { 4904 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4905 } 4906 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4907 PetscFunctionReturn(0); 4908 } 4909 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4910 if (scall == MAT_INITIAL_MATRIX){ 4911 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4912 } 4913 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4914 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4915 PetscFunctionReturn(0); 4916 } 4917 4918 #undef __FUNCT__ 4919 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4920 /*@ 4921 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4922 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4923 with MatGetSize() 4924 4925 Not Collective 4926 4927 Input Parameters: 4928 + A - the matrix 4929 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4930 4931 Output Parameter: 4932 . A_loc - the local sequential matrix generated 4933 4934 Level: developer 4935 4936 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4937 4938 @*/ 4939 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4940 { 4941 PetscErrorCode ierr; 4942 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4943 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4944 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4945 MatScalar *aa=a->a,*ba=b->a,*cam; 4946 PetscScalar *ca; 4947 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4948 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4949 PetscBool match; 4950 4951 PetscFunctionBegin; 4952 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4953 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4954 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4955 if (scall == MAT_INITIAL_MATRIX){ 4956 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4957 ci[0] = 0; 4958 for (i=0; i<am; i++){ 4959 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4960 } 4961 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4962 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4963 k = 0; 4964 for (i=0; i<am; i++) { 4965 ncols_o = bi[i+1] - bi[i]; 4966 ncols_d = ai[i+1] - ai[i]; 4967 /* off-diagonal portion of A */ 4968 for (jo=0; jo<ncols_o; jo++) { 4969 col = cmap[*bj]; 4970 if (col >= cstart) break; 4971 cj[k] = col; bj++; 4972 ca[k++] = *ba++; 4973 } 4974 /* diagonal portion of A */ 4975 for (j=0; j<ncols_d; j++) { 4976 cj[k] = cstart + *aj++; 4977 ca[k++] = *aa++; 4978 } 4979 /* off-diagonal portion of A */ 4980 for (j=jo; j<ncols_o; j++) { 4981 cj[k] = cmap[*bj++]; 4982 ca[k++] = *ba++; 4983 } 4984 } 4985 /* put together the new matrix */ 4986 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4987 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4988 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4989 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4990 mat->free_a = PETSC_TRUE; 4991 mat->free_ij = PETSC_TRUE; 4992 mat->nonew = 0; 4993 } else if (scall == MAT_REUSE_MATRIX){ 4994 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4995 ci = mat->i; cj = mat->j; cam = mat->a; 4996 for (i=0; i<am; i++) { 4997 /* off-diagonal portion of A */ 4998 ncols_o = bi[i+1] - bi[i]; 4999 for (jo=0; jo<ncols_o; jo++) { 5000 col = cmap[*bj]; 5001 if (col >= cstart) break; 5002 *cam++ = *ba++; bj++; 5003 } 5004 /* diagonal portion of A */ 5005 ncols_d = ai[i+1] - ai[i]; 5006 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5007 /* off-diagonal portion of A */ 5008 for (j=jo; j<ncols_o; j++) { 5009 *cam++ = *ba++; bj++; 5010 } 5011 } 5012 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5013 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5014 PetscFunctionReturn(0); 5015 } 5016 5017 #undef __FUNCT__ 5018 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5019 /*@C 5020 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5021 5022 Not Collective 5023 5024 Input Parameters: 5025 + A - the matrix 5026 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5027 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5028 5029 Output Parameter: 5030 . A_loc - the local sequential matrix generated 5031 5032 Level: developer 5033 5034 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5035 5036 @*/ 5037 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5038 { 5039 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5040 PetscErrorCode ierr; 5041 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5042 IS isrowa,iscola; 5043 Mat *aloc; 5044 PetscBool match; 5045 5046 PetscFunctionBegin; 5047 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5048 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5049 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5050 if (!row){ 5051 start = A->rmap->rstart; end = A->rmap->rend; 5052 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5053 } else { 5054 isrowa = *row; 5055 } 5056 if (!col){ 5057 start = A->cmap->rstart; 5058 cmap = a->garray; 5059 nzA = a->A->cmap->n; 5060 nzB = a->B->cmap->n; 5061 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5062 ncols = 0; 5063 for (i=0; i<nzB; i++) { 5064 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5065 else break; 5066 } 5067 imark = i; 5068 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5069 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5070 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5071 } else { 5072 iscola = *col; 5073 } 5074 if (scall != MAT_INITIAL_MATRIX){ 5075 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5076 aloc[0] = *A_loc; 5077 } 5078 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5079 *A_loc = aloc[0]; 5080 ierr = PetscFree(aloc);CHKERRQ(ierr); 5081 if (!row){ 5082 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5083 } 5084 if (!col){ 5085 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5086 } 5087 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5088 PetscFunctionReturn(0); 5089 } 5090 5091 #undef __FUNCT__ 5092 #define __FUNCT__ "MatGetBrowsOfAcols" 5093 /*@C 5094 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5095 5096 Collective on Mat 5097 5098 Input Parameters: 5099 + A,B - the matrices in mpiaij format 5100 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5101 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5102 5103 Output Parameter: 5104 + rowb, colb - index sets of rows and columns of B to extract 5105 - B_seq - the sequential matrix generated 5106 5107 Level: developer 5108 5109 @*/ 5110 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5111 { 5112 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5113 PetscErrorCode ierr; 5114 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5115 IS isrowb,iscolb; 5116 Mat *bseq=PETSC_NULL; 5117 5118 PetscFunctionBegin; 5119 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5120 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5121 } 5122 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5123 5124 if (scall == MAT_INITIAL_MATRIX){ 5125 start = A->cmap->rstart; 5126 cmap = a->garray; 5127 nzA = a->A->cmap->n; 5128 nzB = a->B->cmap->n; 5129 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5130 ncols = 0; 5131 for (i=0; i<nzB; i++) { /* row < local row index */ 5132 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5133 else break; 5134 } 5135 imark = i; 5136 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5137 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5138 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5139 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5140 } else { 5141 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5142 isrowb = *rowb; iscolb = *colb; 5143 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5144 bseq[0] = *B_seq; 5145 } 5146 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5147 *B_seq = bseq[0]; 5148 ierr = PetscFree(bseq);CHKERRQ(ierr); 5149 if (!rowb){ 5150 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5151 } else { 5152 *rowb = isrowb; 5153 } 5154 if (!colb){ 5155 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5156 } else { 5157 *colb = iscolb; 5158 } 5159 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5160 PetscFunctionReturn(0); 5161 } 5162 5163 #undef __FUNCT__ 5164 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5165 /* 5166 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5167 of the OFF-DIAGONAL portion of local A 5168 5169 Collective on Mat 5170 5171 Input Parameters: 5172 + A,B - the matrices in mpiaij format 5173 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5174 5175 Output Parameter: 5176 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5177 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5178 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5179 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5180 5181 Level: developer 5182 5183 */ 5184 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5185 { 5186 VecScatter_MPI_General *gen_to,*gen_from; 5187 PetscErrorCode ierr; 5188 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5189 Mat_SeqAIJ *b_oth; 5190 VecScatter ctx=a->Mvctx; 5191 MPI_Comm comm=((PetscObject)ctx)->comm; 5192 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5193 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5194 PetscScalar *rvalues,*svalues; 5195 MatScalar *b_otha,*bufa,*bufA; 5196 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5197 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5198 MPI_Status *sstatus,rstatus; 5199 PetscMPIInt jj; 5200 PetscInt *cols,sbs,rbs; 5201 PetscScalar *vals; 5202 5203 PetscFunctionBegin; 5204 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5205 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5206 } 5207 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5208 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5209 5210 gen_to = (VecScatter_MPI_General*)ctx->todata; 5211 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5212 rvalues = gen_from->values; /* holds the length of receiving row */ 5213 svalues = gen_to->values; /* holds the length of sending row */ 5214 nrecvs = gen_from->n; 5215 nsends = gen_to->n; 5216 5217 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5218 srow = gen_to->indices; /* local row index to be sent */ 5219 sstarts = gen_to->starts; 5220 sprocs = gen_to->procs; 5221 sstatus = gen_to->sstatus; 5222 sbs = gen_to->bs; 5223 rstarts = gen_from->starts; 5224 rprocs = gen_from->procs; 5225 rbs = gen_from->bs; 5226 5227 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5228 if (scall == MAT_INITIAL_MATRIX){ 5229 /* i-array */ 5230 /*---------*/ 5231 /* post receives */ 5232 for (i=0; i<nrecvs; i++){ 5233 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5234 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5235 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5236 } 5237 5238 /* pack the outgoing message */ 5239 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5240 sstartsj[0] = 0; rstartsj[0] = 0; 5241 len = 0; /* total length of j or a array to be sent */ 5242 k = 0; 5243 for (i=0; i<nsends; i++){ 5244 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5245 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5246 for (j=0; j<nrows; j++) { 5247 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5248 for (l=0; l<sbs; l++){ 5249 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5250 rowlen[j*sbs+l] = ncols; 5251 len += ncols; 5252 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5253 } 5254 k++; 5255 } 5256 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5257 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5258 } 5259 /* recvs and sends of i-array are completed */ 5260 i = nrecvs; 5261 while (i--) { 5262 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5263 } 5264 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5265 5266 /* allocate buffers for sending j and a arrays */ 5267 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5268 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5269 5270 /* create i-array of B_oth */ 5271 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5272 b_othi[0] = 0; 5273 len = 0; /* total length of j or a array to be received */ 5274 k = 0; 5275 for (i=0; i<nrecvs; i++){ 5276 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5277 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5278 for (j=0; j<nrows; j++) { 5279 b_othi[k+1] = b_othi[k] + rowlen[j]; 5280 len += rowlen[j]; k++; 5281 } 5282 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5283 } 5284 5285 /* allocate space for j and a arrrays of B_oth */ 5286 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5287 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5288 5289 /* j-array */ 5290 /*---------*/ 5291 /* post receives of j-array */ 5292 for (i=0; i<nrecvs; i++){ 5293 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5294 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5295 } 5296 5297 /* pack the outgoing message j-array */ 5298 k = 0; 5299 for (i=0; i<nsends; i++){ 5300 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5301 bufJ = bufj+sstartsj[i]; 5302 for (j=0; j<nrows; j++) { 5303 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5304 for (ll=0; ll<sbs; ll++){ 5305 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5306 for (l=0; l<ncols; l++){ 5307 *bufJ++ = cols[l]; 5308 } 5309 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5310 } 5311 } 5312 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5313 } 5314 5315 /* recvs and sends of j-array are completed */ 5316 i = nrecvs; 5317 while (i--) { 5318 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5319 } 5320 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5321 } else if (scall == MAT_REUSE_MATRIX){ 5322 sstartsj = *startsj_s; 5323 rstartsj = *startsj_r; 5324 bufa = *bufa_ptr; 5325 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5326 b_otha = b_oth->a; 5327 } else { 5328 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5329 } 5330 5331 /* a-array */ 5332 /*---------*/ 5333 /* post receives of a-array */ 5334 for (i=0; i<nrecvs; i++){ 5335 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5336 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5337 } 5338 5339 /* pack the outgoing message a-array */ 5340 k = 0; 5341 for (i=0; i<nsends; i++){ 5342 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5343 bufA = bufa+sstartsj[i]; 5344 for (j=0; j<nrows; j++) { 5345 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5346 for (ll=0; ll<sbs; ll++){ 5347 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5348 for (l=0; l<ncols; l++){ 5349 *bufA++ = vals[l]; 5350 } 5351 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5352 } 5353 } 5354 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5355 } 5356 /* recvs and sends of a-array are completed */ 5357 i = nrecvs; 5358 while (i--) { 5359 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5360 } 5361 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5362 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5363 5364 if (scall == MAT_INITIAL_MATRIX){ 5365 /* put together the new matrix */ 5366 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5367 5368 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5369 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5370 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5371 b_oth->free_a = PETSC_TRUE; 5372 b_oth->free_ij = PETSC_TRUE; 5373 b_oth->nonew = 0; 5374 5375 ierr = PetscFree(bufj);CHKERRQ(ierr); 5376 if (!startsj_s || !bufa_ptr){ 5377 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5378 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5379 } else { 5380 *startsj_s = sstartsj; 5381 *startsj_r = rstartsj; 5382 *bufa_ptr = bufa; 5383 } 5384 } 5385 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5386 PetscFunctionReturn(0); 5387 } 5388 5389 #undef __FUNCT__ 5390 #define __FUNCT__ "MatGetCommunicationStructs" 5391 /*@C 5392 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5393 5394 Not Collective 5395 5396 Input Parameters: 5397 . A - The matrix in mpiaij format 5398 5399 Output Parameter: 5400 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5401 . colmap - A map from global column index to local index into lvec 5402 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5403 5404 Level: developer 5405 5406 @*/ 5407 #if defined (PETSC_USE_CTABLE) 5408 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5409 #else 5410 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5411 #endif 5412 { 5413 Mat_MPIAIJ *a; 5414 5415 PetscFunctionBegin; 5416 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5417 PetscValidPointer(lvec, 2); 5418 PetscValidPointer(colmap, 3); 5419 PetscValidPointer(multScatter, 4); 5420 a = (Mat_MPIAIJ *) A->data; 5421 if (lvec) *lvec = a->lvec; 5422 if (colmap) *colmap = a->colmap; 5423 if (multScatter) *multScatter = a->Mvctx; 5424 PetscFunctionReturn(0); 5425 } 5426 5427 EXTERN_C_BEGIN 5428 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5429 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5430 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5431 EXTERN_C_END 5432 5433 #undef __FUNCT__ 5434 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5435 /* 5436 Computes (B'*A')' since computing B*A directly is untenable 5437 5438 n p p 5439 ( ) ( ) ( ) 5440 m ( A ) * n ( B ) = m ( C ) 5441 ( ) ( ) ( ) 5442 5443 */ 5444 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5445 { 5446 PetscErrorCode ierr; 5447 Mat At,Bt,Ct; 5448 5449 PetscFunctionBegin; 5450 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5451 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5452 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5453 ierr = MatDestroy(&At);CHKERRQ(ierr); 5454 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5455 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5456 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5457 PetscFunctionReturn(0); 5458 } 5459 5460 #undef __FUNCT__ 5461 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5462 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5463 { 5464 PetscErrorCode ierr; 5465 PetscInt m=A->rmap->n,n=B->cmap->n; 5466 Mat Cmat; 5467 5468 PetscFunctionBegin; 5469 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5470 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5471 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5472 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5473 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5474 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5475 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5476 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5477 *C = Cmat; 5478 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5479 PetscFunctionReturn(0); 5480 } 5481 5482 /* ----------------------------------------------------------------*/ 5483 #undef __FUNCT__ 5484 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5485 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5486 { 5487 PetscErrorCode ierr; 5488 5489 PetscFunctionBegin; 5490 if (scall == MAT_INITIAL_MATRIX){ 5491 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5492 } 5493 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5494 PetscFunctionReturn(0); 5495 } 5496 5497 EXTERN_C_BEGIN 5498 #if defined(PETSC_HAVE_MUMPS) 5499 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5500 #endif 5501 #if defined(PETSC_HAVE_PASTIX) 5502 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5503 #endif 5504 #if defined(PETSC_HAVE_SUPERLU_DIST) 5505 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5506 #endif 5507 #if defined(PETSC_HAVE_SPOOLES) 5508 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5509 #endif 5510 EXTERN_C_END 5511 5512 /*MC 5513 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5514 5515 Options Database Keys: 5516 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5517 5518 Level: beginner 5519 5520 .seealso: MatCreateAIJ() 5521 M*/ 5522 5523 EXTERN_C_BEGIN 5524 #undef __FUNCT__ 5525 #define __FUNCT__ "MatCreate_MPIAIJ" 5526 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5527 { 5528 Mat_MPIAIJ *b; 5529 PetscErrorCode ierr; 5530 PetscMPIInt size; 5531 5532 PetscFunctionBegin; 5533 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5534 5535 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5536 B->data = (void*)b; 5537 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5538 B->assembled = PETSC_FALSE; 5539 5540 B->insertmode = NOT_SET_VALUES; 5541 b->size = size; 5542 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5543 5544 /* build cache for off array entries formed */ 5545 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5546 b->donotstash = PETSC_FALSE; 5547 b->colmap = 0; 5548 b->garray = 0; 5549 b->roworiented = PETSC_TRUE; 5550 5551 /* stuff used for matrix vector multiply */ 5552 b->lvec = PETSC_NULL; 5553 b->Mvctx = PETSC_NULL; 5554 5555 /* stuff for MatGetRow() */ 5556 b->rowindices = 0; 5557 b->rowvalues = 0; 5558 b->getrowactive = PETSC_FALSE; 5559 5560 #if defined(PETSC_HAVE_SPOOLES) 5561 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5562 "MatGetFactor_mpiaij_spooles", 5563 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5564 #endif 5565 #if defined(PETSC_HAVE_MUMPS) 5566 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5567 "MatGetFactor_aij_mumps", 5568 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5569 #endif 5570 #if defined(PETSC_HAVE_PASTIX) 5571 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5572 "MatGetFactor_mpiaij_pastix", 5573 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5574 #endif 5575 #if defined(PETSC_HAVE_SUPERLU_DIST) 5576 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5577 "MatGetFactor_mpiaij_superlu_dist", 5578 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5579 #endif 5580 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5581 "MatStoreValues_MPIAIJ", 5582 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5583 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5584 "MatRetrieveValues_MPIAIJ", 5585 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5586 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5587 "MatGetDiagonalBlock_MPIAIJ", 5588 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5589 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5590 "MatIsTranspose_MPIAIJ", 5591 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5592 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5593 "MatMPIAIJSetPreallocation_MPIAIJ", 5594 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5595 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5596 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5597 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5598 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5599 "MatDiagonalScaleLocal_MPIAIJ", 5600 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5601 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5602 "MatConvert_MPIAIJ_MPIAIJPERM", 5603 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5604 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5605 "MatConvert_MPIAIJ_MPIAIJCRL", 5606 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5607 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5608 "MatConvert_MPIAIJ_MPISBAIJ", 5609 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5610 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5611 "MatMatMult_MPIDense_MPIAIJ", 5612 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5613 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5614 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5615 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5616 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5617 "MatMatMultNumeric_MPIDense_MPIAIJ", 5618 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5619 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5620 PetscFunctionReturn(0); 5621 } 5622 EXTERN_C_END 5623 5624 #undef __FUNCT__ 5625 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5626 /*@ 5627 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5628 and "off-diagonal" part of the matrix in CSR format. 5629 5630 Collective on MPI_Comm 5631 5632 Input Parameters: 5633 + comm - MPI communicator 5634 . m - number of local rows (Cannot be PETSC_DECIDE) 5635 . n - This value should be the same as the local size used in creating the 5636 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5637 calculated if N is given) For square matrices n is almost always m. 5638 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5639 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5640 . i - row indices for "diagonal" portion of matrix 5641 . j - column indices 5642 . a - matrix values 5643 . oi - row indices for "off-diagonal" portion of matrix 5644 . oj - column indices 5645 - oa - matrix values 5646 5647 Output Parameter: 5648 . mat - the matrix 5649 5650 Level: advanced 5651 5652 Notes: 5653 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5654 must free the arrays once the matrix has been destroyed and not before. 5655 5656 The i and j indices are 0 based 5657 5658 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5659 5660 This sets local rows and cannot be used to set off-processor values. 5661 5662 You cannot later use MatSetValues() to change values in this matrix. 5663 5664 .keywords: matrix, aij, compressed row, sparse, parallel 5665 5666 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5667 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5668 @*/ 5669 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5670 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5671 { 5672 PetscErrorCode ierr; 5673 Mat_MPIAIJ *maij; 5674 5675 PetscFunctionBegin; 5676 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5677 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5678 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5679 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5680 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5681 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5682 maij = (Mat_MPIAIJ*) (*mat)->data; 5683 maij->donotstash = PETSC_TRUE; 5684 (*mat)->preallocated = PETSC_TRUE; 5685 5686 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5687 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5688 5689 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5690 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5691 5692 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5693 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5694 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5695 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5696 5697 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5698 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5699 PetscFunctionReturn(0); 5700 } 5701 5702 /* 5703 Special version for direct calls from Fortran 5704 */ 5705 #include <petsc-private/fortranimpl.h> 5706 5707 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5708 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5709 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5710 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5711 #endif 5712 5713 /* Change these macros so can be used in void function */ 5714 #undef CHKERRQ 5715 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5716 #undef SETERRQ2 5717 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5718 #undef SETERRQ3 5719 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5720 #undef SETERRQ 5721 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5722 5723 EXTERN_C_BEGIN 5724 #undef __FUNCT__ 5725 #define __FUNCT__ "matsetvaluesmpiaij_" 5726 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5727 { 5728 Mat mat = *mmat; 5729 PetscInt m = *mm, n = *mn; 5730 InsertMode addv = *maddv; 5731 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5732 PetscScalar value; 5733 PetscErrorCode ierr; 5734 5735 MatCheckPreallocated(mat,1); 5736 if (mat->insertmode == NOT_SET_VALUES) { 5737 mat->insertmode = addv; 5738 } 5739 #if defined(PETSC_USE_DEBUG) 5740 else if (mat->insertmode != addv) { 5741 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5742 } 5743 #endif 5744 { 5745 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5746 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5747 PetscBool roworiented = aij->roworiented; 5748 5749 /* Some Variables required in the macro */ 5750 Mat A = aij->A; 5751 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5752 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5753 MatScalar *aa = a->a; 5754 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5755 Mat B = aij->B; 5756 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5757 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5758 MatScalar *ba = b->a; 5759 5760 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5761 PetscInt nonew = a->nonew; 5762 MatScalar *ap1,*ap2; 5763 5764 PetscFunctionBegin; 5765 for (i=0; i<m; i++) { 5766 if (im[i] < 0) continue; 5767 #if defined(PETSC_USE_DEBUG) 5768 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5769 #endif 5770 if (im[i] >= rstart && im[i] < rend) { 5771 row = im[i] - rstart; 5772 lastcol1 = -1; 5773 rp1 = aj + ai[row]; 5774 ap1 = aa + ai[row]; 5775 rmax1 = aimax[row]; 5776 nrow1 = ailen[row]; 5777 low1 = 0; 5778 high1 = nrow1; 5779 lastcol2 = -1; 5780 rp2 = bj + bi[row]; 5781 ap2 = ba + bi[row]; 5782 rmax2 = bimax[row]; 5783 nrow2 = bilen[row]; 5784 low2 = 0; 5785 high2 = nrow2; 5786 5787 for (j=0; j<n; j++) { 5788 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5789 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5790 if (in[j] >= cstart && in[j] < cend){ 5791 col = in[j] - cstart; 5792 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5793 } else if (in[j] < 0) continue; 5794 #if defined(PETSC_USE_DEBUG) 5795 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5796 #endif 5797 else { 5798 if (mat->was_assembled) { 5799 if (!aij->colmap) { 5800 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5801 } 5802 #if defined (PETSC_USE_CTABLE) 5803 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5804 col--; 5805 #else 5806 col = aij->colmap[in[j]] - 1; 5807 #endif 5808 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5809 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5810 col = in[j]; 5811 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5812 B = aij->B; 5813 b = (Mat_SeqAIJ*)B->data; 5814 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5815 rp2 = bj + bi[row]; 5816 ap2 = ba + bi[row]; 5817 rmax2 = bimax[row]; 5818 nrow2 = bilen[row]; 5819 low2 = 0; 5820 high2 = nrow2; 5821 bm = aij->B->rmap->n; 5822 ba = b->a; 5823 } 5824 } else col = in[j]; 5825 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5826 } 5827 } 5828 } else { 5829 if (!aij->donotstash) { 5830 if (roworiented) { 5831 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5832 } else { 5833 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5834 } 5835 } 5836 } 5837 }} 5838 PetscFunctionReturnVoid(); 5839 } 5840 EXTERN_C_END 5841 5842