1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr); 187 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 188 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 189 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 190 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 191 rowners[0] = 0; 192 for (i=2; i<=size; i++) { 193 rowners[i] += rowners[i-1]; 194 } 195 rstart = rowners[rank]; 196 rend = rowners[rank+1]; 197 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 198 if (!rank) { 199 gmata = (Mat_SeqAIJ*) gmat->data; 200 /* send row lengths to all processors */ 201 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 202 for (i=1; i<size; i++) { 203 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 204 } 205 /* determine number diagonal and off-diagonal counts */ 206 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 207 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 208 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 209 jj = 0; 210 for (i=0; i<m; i++) { 211 for (j=0; j<dlens[i]; j++) { 212 if (gmata->j[jj] < rstart) ld[i]++; 213 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 214 jj++; 215 } 216 } 217 /* send column indices to other processes */ 218 for (i=1; i<size; i++) { 219 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 220 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 222 } 223 224 /* send numerical values to other processes */ 225 for (i=1; i<size; i++) { 226 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 227 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 228 } 229 gmataa = gmata->a; 230 gmataj = gmata->j; 231 232 } else { 233 /* receive row lengths */ 234 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 235 /* receive column indices */ 236 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 237 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 238 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 239 /* determine number diagonal and off-diagonal counts */ 240 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 241 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 242 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 243 jj = 0; 244 for (i=0; i<m; i++) { 245 for (j=0; j<dlens[i]; j++) { 246 if (gmataj[jj] < rstart) ld[i]++; 247 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 248 jj++; 249 } 250 } 251 /* receive numerical values */ 252 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 253 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 254 } 255 /* set preallocation */ 256 for (i=0; i<m; i++) { 257 dlens[i] -= olens[i]; 258 } 259 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 260 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 261 262 for (i=0; i<m; i++) { 263 dlens[i] += olens[i]; 264 } 265 cnt = 0; 266 for (i=0; i<m; i++) { 267 row = rstart + i; 268 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 269 cnt += dlens[i]; 270 } 271 if (rank) { 272 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 273 } 274 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 275 ierr = PetscFree(rowners);CHKERRQ(ierr); 276 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 277 *inmat = mat; 278 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 279 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 280 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 281 mat = *inmat; 282 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 283 if (!rank) { 284 /* send numerical values to other processes */ 285 gmata = (Mat_SeqAIJ*) gmat->data; 286 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 287 gmataa = gmata->a; 288 for (i=1; i<size; i++) { 289 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 290 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 291 } 292 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 293 } else { 294 /* receive numerical values from process 0*/ 295 nz = Ad->nz + Ao->nz; 296 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 297 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 298 } 299 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 300 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 301 ad = Ad->a; 302 ao = Ao->a; 303 if (mat->rmap->n) { 304 i = 0; 305 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 306 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 307 } 308 for (i=1; i<mat->rmap->n; i++) { 309 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 310 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 311 } 312 i--; 313 if (mat->rmap->n) { 314 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 315 } 316 if (rank) { 317 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 318 } 319 } 320 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 322 CHKMEMQ; 323 PetscFunctionReturn(0); 324 } 325 326 /* 327 Local utility routine that creates a mapping from the global column 328 number to the local number in the off-diagonal part of the local 329 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 330 a slightly higher hash table cost; without it it is not scalable (each processor 331 has an order N integer array but is fast to acess. 332 */ 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 336 { 337 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 338 PetscErrorCode ierr; 339 PetscInt n = aij->B->cmap->n,i; 340 341 PetscFunctionBegin; 342 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 343 #if defined (PETSC_USE_CTABLE) 344 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 345 for (i=0; i<n; i++){ 346 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 347 } 348 #else 349 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 350 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 351 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 352 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 353 #endif 354 PetscFunctionReturn(0); 355 } 356 357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 358 { \ 359 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 360 lastcol1 = col;\ 361 while (high1-low1 > 5) { \ 362 t = (low1+high1)/2; \ 363 if (rp1[t] > col) high1 = t; \ 364 else low1 = t; \ 365 } \ 366 for (_i=low1; _i<high1; _i++) { \ 367 if (rp1[_i] > col) break; \ 368 if (rp1[_i] == col) { \ 369 if (addv == ADD_VALUES) ap1[_i] += value; \ 370 else ap1[_i] = value; \ 371 goto a_noinsert; \ 372 } \ 373 } \ 374 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 375 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 376 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 377 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 378 N = nrow1++ - 1; a->nz++; high1++; \ 379 /* shift up all the later entries in this row */ \ 380 for (ii=N; ii>=_i; ii--) { \ 381 rp1[ii+1] = rp1[ii]; \ 382 ap1[ii+1] = ap1[ii]; \ 383 } \ 384 rp1[_i] = col; \ 385 ap1[_i] = value; \ 386 a_noinsert: ; \ 387 ailen[row] = nrow1; \ 388 } 389 390 391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 392 { \ 393 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 394 lastcol2 = col;\ 395 while (high2-low2 > 5) { \ 396 t = (low2+high2)/2; \ 397 if (rp2[t] > col) high2 = t; \ 398 else low2 = t; \ 399 } \ 400 for (_i=low2; _i<high2; _i++) { \ 401 if (rp2[_i] > col) break; \ 402 if (rp2[_i] == col) { \ 403 if (addv == ADD_VALUES) ap2[_i] += value; \ 404 else ap2[_i] = value; \ 405 goto b_noinsert; \ 406 } \ 407 } \ 408 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 409 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 410 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 411 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 412 N = nrow2++ - 1; b->nz++; high2++; \ 413 /* shift up all the later entries in this row */ \ 414 for (ii=N; ii>=_i; ii--) { \ 415 rp2[ii+1] = rp2[ii]; \ 416 ap2[ii+1] = ap2[ii]; \ 417 } \ 418 rp2[_i] = col; \ 419 ap2[_i] = value; \ 420 b_noinsert: ; \ 421 bilen[row] = nrow2; \ 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 427 { 428 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 430 PetscErrorCode ierr; 431 PetscInt l,*garray = mat->garray,diag; 432 433 PetscFunctionBegin; 434 /* code only works for square matrices A */ 435 436 /* find size of row to the left of the diagonal part */ 437 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 438 row = row - diag; 439 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 440 if (garray[b->j[b->i[row]+l]] > diag) break; 441 } 442 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 443 444 /* diagonal part */ 445 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 446 447 /* right of diagonal part */ 448 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValues_MPIAIJ" 454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscScalar value; 458 PetscErrorCode ierr; 459 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 460 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 461 PetscBool roworiented = aij->roworiented; 462 463 /* Some Variables required in the macro */ 464 Mat A = aij->A; 465 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 466 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 467 MatScalar *aa = a->a; 468 PetscBool ignorezeroentries = a->ignorezeroentries; 469 Mat B = aij->B; 470 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 471 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 472 MatScalar *ba = b->a; 473 474 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 475 PetscInt nonew; 476 MatScalar *ap1,*ap2; 477 478 PetscFunctionBegin; 479 if (v) PetscValidScalarPointer(v,6); 480 for (i=0; i<m; i++) { 481 if (im[i] < 0) continue; 482 #if defined(PETSC_USE_DEBUG) 483 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 484 #endif 485 if (im[i] >= rstart && im[i] < rend) { 486 row = im[i] - rstart; 487 lastcol1 = -1; 488 rp1 = aj + ai[row]; 489 ap1 = aa + ai[row]; 490 rmax1 = aimax[row]; 491 nrow1 = ailen[row]; 492 low1 = 0; 493 high1 = nrow1; 494 lastcol2 = -1; 495 rp2 = bj + bi[row]; 496 ap2 = ba + bi[row]; 497 rmax2 = bimax[row]; 498 nrow2 = bilen[row]; 499 low2 = 0; 500 high2 = nrow2; 501 502 for (j=0; j<n; j++) { 503 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 504 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 505 if (in[j] >= cstart && in[j] < cend){ 506 col = in[j] - cstart; 507 nonew = a->nonew; 508 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 509 } else if (in[j] < 0) continue; 510 #if defined(PETSC_USE_DEBUG) 511 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 512 #endif 513 else { 514 if (mat->was_assembled) { 515 if (!aij->colmap) { 516 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 517 } 518 #if defined (PETSC_USE_CTABLE) 519 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col--; 521 #else 522 col = aij->colmap[in[j]] - 1; 523 #endif 524 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 528 B = aij->B; 529 b = (Mat_SeqAIJ*)B->data; 530 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 531 rp2 = bj + bi[row]; 532 ap2 = ba + bi[row]; 533 rmax2 = bimax[row]; 534 nrow2 = bilen[row]; 535 low2 = 0; 536 high2 = nrow2; 537 bm = aij->B->rmap->n; 538 ba = b->a; 539 } 540 } else col = in[j]; 541 nonew = b->nonew; 542 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 543 } 544 } 545 } else { 546 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 547 if (!aij->donotstash) { 548 mat->assembled = PETSC_FALSE; 549 if (roworiented) { 550 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatGetValues_MPIAIJ" 562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 563 { 564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 565 PetscErrorCode ierr; 566 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 567 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 568 569 PetscFunctionBegin; 570 for (i=0; i<m; i++) { 571 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 572 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 573 if (idxm[i] >= rstart && idxm[i] < rend) { 574 row = idxm[i] - rstart; 575 for (j=0; j<n; j++) { 576 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 577 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 578 if (idxn[j] >= cstart && idxn[j] < cend){ 579 col = idxn[j] - cstart; 580 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } else { 582 if (!aij->colmap) { 583 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 584 } 585 #if defined (PETSC_USE_CTABLE) 586 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 587 col --; 588 #else 589 col = aij->colmap[idxn[j]] - 1; 590 #endif 591 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 592 else { 593 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 594 } 595 } 596 } 597 } else { 598 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 599 } 600 } 601 PetscFunctionReturn(0); 602 } 603 604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 609 { 610 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 611 PetscErrorCode ierr; 612 PetscInt nstash,reallocs; 613 InsertMode addv; 614 615 PetscFunctionBegin; 616 if (aij->donotstash || mat->nooffprocentries) { 617 PetscFunctionReturn(0); 618 } 619 620 /* make sure all processors are either in INSERTMODE or ADDMODE */ 621 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 622 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 623 mat->insertmode = addv; /* in case this processor had no cache */ 624 625 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 626 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 627 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 634 { 635 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 636 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 637 PetscErrorCode ierr; 638 PetscMPIInt n; 639 PetscInt i,j,rstart,ncols,flg; 640 PetscInt *row,*col; 641 PetscBool other_disassembled; 642 PetscScalar *val; 643 InsertMode addv = mat->insertmode; 644 645 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 646 PetscFunctionBegin; 647 if (!aij->donotstash && !mat->nooffprocentries) { 648 while (1) { 649 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 650 if (!flg) break; 651 652 for (i=0; i<n;) { 653 /* Now identify the consecutive vals belonging to the same row */ 654 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 655 if (j < n) ncols = j-i; 656 else ncols = n-i; 657 /* Now assemble all these values with a single function call */ 658 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 659 i = j; 660 } 661 } 662 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 663 } 664 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 665 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 666 667 /* determine if any processor has disassembled, if so we must 668 also disassemble ourselfs, in order that we may reassemble. */ 669 /* 670 if nonzero structure of submatrix B cannot change then we know that 671 no processor disassembled thus we can skip this stuff 672 */ 673 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 674 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 675 if (mat->was_assembled && !other_disassembled) { 676 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 677 } 678 } 679 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 680 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 681 } 682 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 683 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 684 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 685 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 686 687 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 688 aij->rowvalues = 0; 689 690 /* used by MatAXPY() */ 691 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 692 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 693 694 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 695 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 702 { 703 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 704 PetscErrorCode ierr; 705 706 PetscFunctionBegin; 707 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 708 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatZeroRows_MPIAIJ" 714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 715 { 716 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 717 PetscErrorCode ierr; 718 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 719 PetscInt i,*owners = A->rmap->range; 720 PetscInt *nprocs,j,idx,nsends,row; 721 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 722 PetscInt *rvalues,count,base,slen,*source; 723 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 724 MPI_Comm comm = ((PetscObject)A)->comm; 725 MPI_Request *send_waits,*recv_waits; 726 MPI_Status recv_status,*send_status; 727 const PetscScalar *xx; 728 PetscScalar *bb; 729 #if defined(PETSC_DEBUG) 730 PetscBool found = PETSC_FALSE; 731 #endif 732 733 PetscFunctionBegin; 734 /* first count number of contributors to each processor */ 735 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 736 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 737 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 738 j = 0; 739 for (i=0; i<N; i++) { 740 if (lastidx > (idx = rows[i])) j = 0; 741 lastidx = idx; 742 for (; j<size; j++) { 743 if (idx >= owners[j] && idx < owners[j+1]) { 744 nprocs[2*j]++; 745 nprocs[2*j+1] = 1; 746 owner[i] = j; 747 #if defined(PETSC_DEBUG) 748 found = PETSC_TRUE; 749 #endif 750 break; 751 } 752 } 753 #if defined(PETSC_DEBUG) 754 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 755 found = PETSC_FALSE; 756 #endif 757 } 758 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 759 760 if (A->nooffproczerorows) { 761 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 762 nrecvs = nsends; 763 nmax = N; 764 } else { 765 /* inform other processors of number of messages and max length*/ 766 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 767 } 768 769 /* post receives: */ 770 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 771 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 772 for (i=0; i<nrecvs; i++) { 773 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 774 } 775 776 /* do sends: 777 1) starts[i] gives the starting index in svalues for stuff going to 778 the ith processor 779 */ 780 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 781 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 782 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 783 starts[0] = 0; 784 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 785 for (i=0; i<N; i++) { 786 svalues[starts[owner[i]]++] = rows[i]; 787 } 788 789 starts[0] = 0; 790 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 791 count = 0; 792 for (i=0; i<size; i++) { 793 if (nprocs[2*i+1]) { 794 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 795 } 796 } 797 ierr = PetscFree(starts);CHKERRQ(ierr); 798 799 base = owners[rank]; 800 801 /* wait on receives */ 802 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 803 count = nrecvs; slen = 0; 804 while (count) { 805 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 806 /* unpack receives into our local space */ 807 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 808 source[imdex] = recv_status.MPI_SOURCE; 809 lens[imdex] = n; 810 slen += n; 811 count--; 812 } 813 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 814 815 /* move the data into the send scatter */ 816 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 817 count = 0; 818 for (i=0; i<nrecvs; i++) { 819 values = rvalues + i*nmax; 820 for (j=0; j<lens[i]; j++) { 821 lrows[count++] = values[j] - base; 822 } 823 } 824 ierr = PetscFree(rvalues);CHKERRQ(ierr); 825 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 826 ierr = PetscFree(owner);CHKERRQ(ierr); 827 ierr = PetscFree(nprocs);CHKERRQ(ierr); 828 829 /* fix right hand side if needed */ 830 if (x && b) { 831 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 833 for (i=0; i<slen; i++) { 834 bb[lrows[i]] = diag*xx[lrows[i]]; 835 } 836 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 837 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 838 } 839 /* 840 Zero the required rows. If the "diagonal block" of the matrix 841 is square and the user wishes to set the diagonal we use separate 842 code so that MatSetValues() is not called for each diagonal allocating 843 new memory, thus calling lots of mallocs and slowing things down. 844 845 */ 846 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 847 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 848 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 849 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 850 } else if (diag != 0.0) { 851 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 852 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 853 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 855 } 856 for (i = 0; i < slen; i++) { 857 row = lrows[i] + rstart; 858 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 859 } 860 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 861 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 862 } else { 863 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 864 } 865 ierr = PetscFree(lrows);CHKERRQ(ierr); 866 867 /* wait on sends */ 868 if (nsends) { 869 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 870 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 871 ierr = PetscFree(send_status);CHKERRQ(ierr); 872 } 873 ierr = PetscFree(send_waits);CHKERRQ(ierr); 874 ierr = PetscFree(svalues);CHKERRQ(ierr); 875 PetscFunctionReturn(0); 876 } 877 878 #undef __FUNCT__ 879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 881 { 882 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 883 PetscErrorCode ierr; 884 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 885 PetscInt i,*owners = A->rmap->range; 886 PetscInt *nprocs,j,idx,nsends; 887 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 888 PetscInt *rvalues,count,base,slen,*source; 889 PetscInt *lens,*lrows,*values,m; 890 MPI_Comm comm = ((PetscObject)A)->comm; 891 MPI_Request *send_waits,*recv_waits; 892 MPI_Status recv_status,*send_status; 893 const PetscScalar *xx; 894 PetscScalar *bb,*mask; 895 Vec xmask,lmask; 896 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 897 const PetscInt *aj, *ii,*ridx; 898 PetscScalar *aa; 899 #if defined(PETSC_DEBUG) 900 PetscBool found = PETSC_FALSE; 901 #endif 902 903 PetscFunctionBegin; 904 /* first count number of contributors to each processor */ 905 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 906 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 907 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 908 j = 0; 909 for (i=0; i<N; i++) { 910 if (lastidx > (idx = rows[i])) j = 0; 911 lastidx = idx; 912 for (; j<size; j++) { 913 if (idx >= owners[j] && idx < owners[j+1]) { 914 nprocs[2*j]++; 915 nprocs[2*j+1] = 1; 916 owner[i] = j; 917 #if defined(PETSC_DEBUG) 918 found = PETSC_TRUE; 919 #endif 920 break; 921 } 922 } 923 #if defined(PETSC_DEBUG) 924 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 925 found = PETSC_FALSE; 926 #endif 927 } 928 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 929 930 /* inform other processors of number of messages and max length*/ 931 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 932 933 /* post receives: */ 934 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 935 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 936 for (i=0; i<nrecvs; i++) { 937 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 938 } 939 940 /* do sends: 941 1) starts[i] gives the starting index in svalues for stuff going to 942 the ith processor 943 */ 944 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 945 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 946 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 947 starts[0] = 0; 948 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 949 for (i=0; i<N; i++) { 950 svalues[starts[owner[i]]++] = rows[i]; 951 } 952 953 starts[0] = 0; 954 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 955 count = 0; 956 for (i=0; i<size; i++) { 957 if (nprocs[2*i+1]) { 958 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 959 } 960 } 961 ierr = PetscFree(starts);CHKERRQ(ierr); 962 963 base = owners[rank]; 964 965 /* wait on receives */ 966 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 967 count = nrecvs; slen = 0; 968 while (count) { 969 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 970 /* unpack receives into our local space */ 971 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 972 source[imdex] = recv_status.MPI_SOURCE; 973 lens[imdex] = n; 974 slen += n; 975 count--; 976 } 977 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 978 979 /* move the data into the send scatter */ 980 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 981 count = 0; 982 for (i=0; i<nrecvs; i++) { 983 values = rvalues + i*nmax; 984 for (j=0; j<lens[i]; j++) { 985 lrows[count++] = values[j] - base; 986 } 987 } 988 ierr = PetscFree(rvalues);CHKERRQ(ierr); 989 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 990 ierr = PetscFree(owner);CHKERRQ(ierr); 991 ierr = PetscFree(nprocs);CHKERRQ(ierr); 992 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 993 994 /* zero diagonal part of matrix */ 995 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 996 997 /* handle off diagonal part of matrix */ 998 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 999 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1000 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1001 for (i=0; i<slen; i++) { 1002 bb[lrows[i]] = 1; 1003 } 1004 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1005 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1008 if (x) { 1009 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1011 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1012 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1013 } 1014 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1015 1016 /* remove zeroed rows of off diagonal matrix */ 1017 ii = aij->i; 1018 for (i=0; i<slen; i++) { 1019 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1020 } 1021 1022 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1023 if (aij->compressedrow.use){ 1024 m = aij->compressedrow.nrows; 1025 ii = aij->compressedrow.i; 1026 ridx = aij->compressedrow.rindex; 1027 for (i=0; i<m; i++){ 1028 n = ii[i+1] - ii[i]; 1029 aj = aij->j + ii[i]; 1030 aa = aij->a + ii[i]; 1031 1032 for (j=0; j<n; j++) { 1033 if (PetscAbsScalar(mask[*aj])) { 1034 if (b) bb[*ridx] -= *aa*xx[*aj]; 1035 *aa = 0.0; 1036 } 1037 aa++; 1038 aj++; 1039 } 1040 ridx++; 1041 } 1042 } else { /* do not use compressed row format */ 1043 m = l->B->rmap->n; 1044 for (i=0; i<m; i++) { 1045 n = ii[i+1] - ii[i]; 1046 aj = aij->j + ii[i]; 1047 aa = aij->a + ii[i]; 1048 for (j=0; j<n; j++) { 1049 if (PetscAbsScalar(mask[*aj])) { 1050 if (b) bb[i] -= *aa*xx[*aj]; 1051 *aa = 0.0; 1052 } 1053 aa++; 1054 aj++; 1055 } 1056 } 1057 } 1058 if (x) { 1059 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1060 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1061 } 1062 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1063 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1064 ierr = PetscFree(lrows);CHKERRQ(ierr); 1065 1066 /* wait on sends */ 1067 if (nsends) { 1068 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1069 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1070 ierr = PetscFree(send_status);CHKERRQ(ierr); 1071 } 1072 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1073 ierr = PetscFree(svalues);CHKERRQ(ierr); 1074 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatMult_MPIAIJ" 1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1083 PetscErrorCode ierr; 1084 PetscInt nt; 1085 1086 PetscFunctionBegin; 1087 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1088 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1089 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1091 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1092 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1099 { 1100 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1111 { 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1118 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1128 PetscErrorCode ierr; 1129 PetscBool merged; 1130 1131 PetscFunctionBegin; 1132 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1133 /* do nondiagonal part */ 1134 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1135 if (!merged) { 1136 /* send it on its way */ 1137 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 /* do local part */ 1139 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1140 /* receive remote parts: note this assumes the values are not actually */ 1141 /* added in yy until the next line, */ 1142 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 } else { 1144 /* do local part */ 1145 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1146 /* send it on its way */ 1147 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1148 /* values actually were received in the Begin() but we need to call this nop */ 1149 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1150 } 1151 PetscFunctionReturn(0); 1152 } 1153 1154 EXTERN_C_BEGIN 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1157 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1158 { 1159 MPI_Comm comm; 1160 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1161 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1162 IS Me,Notme; 1163 PetscErrorCode ierr; 1164 PetscInt M,N,first,last,*notme,i; 1165 PetscMPIInt size; 1166 1167 PetscFunctionBegin; 1168 1169 /* Easy test: symmetric diagonal block */ 1170 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1171 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1172 if (!*f) PetscFunctionReturn(0); 1173 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1174 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1175 if (size == 1) PetscFunctionReturn(0); 1176 1177 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1178 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1179 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1180 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1181 for (i=0; i<first; i++) notme[i] = i; 1182 for (i=last; i<M; i++) notme[i-last+first] = i; 1183 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1184 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1185 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1186 Aoff = Aoffs[0]; 1187 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1188 Boff = Boffs[0]; 1189 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1190 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1191 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1192 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1193 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1194 ierr = PetscFree(notme);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 EXTERN_C_END 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1202 { 1203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1204 PetscErrorCode ierr; 1205 1206 PetscFunctionBegin; 1207 /* do nondiagonal part */ 1208 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1209 /* send it on its way */ 1210 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1211 /* do local part */ 1212 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1213 /* receive remote parts */ 1214 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 /* 1219 This only works correctly for square matrices where the subblock A->A is the 1220 diagonal block 1221 */ 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1225 { 1226 PetscErrorCode ierr; 1227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1228 1229 PetscFunctionBegin; 1230 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1231 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1232 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatScale_MPIAIJ" 1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1239 { 1240 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1245 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatDestroy_MPIAIJ" 1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1252 { 1253 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1254 PetscErrorCode ierr; 1255 1256 PetscFunctionBegin; 1257 #if defined(PETSC_USE_LOG) 1258 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1259 #endif 1260 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1261 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1263 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1264 #if defined (PETSC_USE_CTABLE) 1265 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1266 #else 1267 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1268 #endif 1269 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1270 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1271 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1272 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1273 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1274 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1275 1276 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1291 { 1292 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1293 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1294 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1295 PetscErrorCode ierr; 1296 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1297 int fd; 1298 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1299 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1300 PetscScalar *column_values; 1301 PetscInt message_count,flowcontrolcount; 1302 FILE *file; 1303 1304 PetscFunctionBegin; 1305 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1306 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1307 nz = A->nz + B->nz; 1308 if (!rank) { 1309 header[0] = MAT_FILE_CLASSID; 1310 header[1] = mat->rmap->N; 1311 header[2] = mat->cmap->N; 1312 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1313 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1314 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1315 /* get largest number of rows any processor has */ 1316 rlen = mat->rmap->n; 1317 range = mat->rmap->range; 1318 for (i=1; i<size; i++) { 1319 rlen = PetscMax(rlen,range[i+1] - range[i]); 1320 } 1321 } else { 1322 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1323 rlen = mat->rmap->n; 1324 } 1325 1326 /* load up the local row counts */ 1327 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1328 for (i=0; i<mat->rmap->n; i++) { 1329 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1330 } 1331 1332 /* store the row lengths to the file */ 1333 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1334 if (!rank) { 1335 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1336 for (i=1; i<size; i++) { 1337 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1338 rlen = range[i+1] - range[i]; 1339 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1340 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1341 } 1342 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1343 } else { 1344 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1345 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1346 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1347 } 1348 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1349 1350 /* load up the local column indices */ 1351 nzmax = nz; /* )th processor needs space a largest processor needs */ 1352 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1353 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1354 cnt = 0; 1355 for (i=0; i<mat->rmap->n; i++) { 1356 for (j=B->i[i]; j<B->i[i+1]; j++) { 1357 if ( (col = garray[B->j[j]]) > cstart) break; 1358 column_indices[cnt++] = col; 1359 } 1360 for (k=A->i[i]; k<A->i[i+1]; k++) { 1361 column_indices[cnt++] = A->j[k] + cstart; 1362 } 1363 for (; j<B->i[i+1]; j++) { 1364 column_indices[cnt++] = garray[B->j[j]]; 1365 } 1366 } 1367 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1368 1369 /* store the column indices to the file */ 1370 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1371 if (!rank) { 1372 MPI_Status status; 1373 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1374 for (i=1; i<size; i++) { 1375 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1376 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1377 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1378 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1379 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1380 } 1381 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1382 } else { 1383 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1384 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1385 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1386 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1387 } 1388 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1389 1390 /* load up the local column values */ 1391 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1392 cnt = 0; 1393 for (i=0; i<mat->rmap->n; i++) { 1394 for (j=B->i[i]; j<B->i[i+1]; j++) { 1395 if ( garray[B->j[j]] > cstart) break; 1396 column_values[cnt++] = B->a[j]; 1397 } 1398 for (k=A->i[i]; k<A->i[i+1]; k++) { 1399 column_values[cnt++] = A->a[k]; 1400 } 1401 for (; j<B->i[i+1]; j++) { 1402 column_values[cnt++] = B->a[j]; 1403 } 1404 } 1405 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1406 1407 /* store the column values to the file */ 1408 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1409 if (!rank) { 1410 MPI_Status status; 1411 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1412 for (i=1; i<size; i++) { 1413 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1414 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1415 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1416 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1417 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1418 } 1419 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1420 } else { 1421 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1422 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1423 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1424 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1425 } 1426 ierr = PetscFree(column_values);CHKERRQ(ierr); 1427 1428 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1429 if (file) { 1430 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1431 } 1432 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1438 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1439 { 1440 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1441 PetscErrorCode ierr; 1442 PetscMPIInt rank = aij->rank,size = aij->size; 1443 PetscBool isdraw,iascii,isbinary; 1444 PetscViewer sviewer; 1445 PetscViewerFormat format; 1446 1447 PetscFunctionBegin; 1448 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1449 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1450 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1451 if (iascii) { 1452 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1453 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1454 MatInfo info; 1455 PetscBool inodes; 1456 1457 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1458 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1459 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1460 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1461 if (!inodes) { 1462 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1463 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1464 } else { 1465 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1466 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1467 } 1468 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1469 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1470 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1471 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1472 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1473 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1474 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1475 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1476 PetscFunctionReturn(0); 1477 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1478 PetscInt inodecount,inodelimit,*inodes; 1479 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1480 if (inodes) { 1481 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1482 } else { 1483 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1484 } 1485 PetscFunctionReturn(0); 1486 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1487 PetscFunctionReturn(0); 1488 } 1489 } else if (isbinary) { 1490 if (size == 1) { 1491 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1492 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1493 } else { 1494 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1495 } 1496 PetscFunctionReturn(0); 1497 } else if (isdraw) { 1498 PetscDraw draw; 1499 PetscBool isnull; 1500 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1501 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1502 } 1503 1504 if (size == 1) { 1505 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1506 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1507 } else { 1508 /* assemble the entire matrix onto first processor. */ 1509 Mat A; 1510 Mat_SeqAIJ *Aloc; 1511 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1512 MatScalar *a; 1513 1514 if (mat->rmap->N > 1024) { 1515 PetscBool flg = PETSC_FALSE; 1516 1517 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1518 if (!flg) { 1519 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1520 } 1521 } 1522 1523 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1524 if (!rank) { 1525 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1526 } else { 1527 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1528 } 1529 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1530 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1531 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1532 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1533 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1534 1535 /* copy over the A part */ 1536 Aloc = (Mat_SeqAIJ*)aij->A->data; 1537 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1538 row = mat->rmap->rstart; 1539 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1540 for (i=0; i<m; i++) { 1541 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1542 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1543 } 1544 aj = Aloc->j; 1545 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1546 1547 /* copy over the B part */ 1548 Aloc = (Mat_SeqAIJ*)aij->B->data; 1549 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1550 row = mat->rmap->rstart; 1551 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1552 ct = cols; 1553 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1554 for (i=0; i<m; i++) { 1555 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1556 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1557 } 1558 ierr = PetscFree(ct);CHKERRQ(ierr); 1559 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1560 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1561 /* 1562 Everyone has to call to draw the matrix since the graphics waits are 1563 synchronized across all processors that share the PetscDraw object 1564 */ 1565 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1566 if (!rank) { 1567 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1568 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1569 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1570 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1571 } 1572 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1573 ierr = MatDestroy(&A);CHKERRQ(ierr); 1574 } 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "MatView_MPIAIJ" 1580 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1581 { 1582 PetscErrorCode ierr; 1583 PetscBool iascii,isdraw,issocket,isbinary; 1584 1585 PetscFunctionBegin; 1586 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1587 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1588 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1589 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1590 if (iascii || isdraw || isbinary || issocket) { 1591 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1592 } else { 1593 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1594 } 1595 PetscFunctionReturn(0); 1596 } 1597 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "MatSOR_MPIAIJ" 1600 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1601 { 1602 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1603 PetscErrorCode ierr; 1604 Vec bb1 = 0; 1605 PetscBool hasop; 1606 1607 PetscFunctionBegin; 1608 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1609 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1610 } 1611 1612 if (flag == SOR_APPLY_UPPER) { 1613 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1618 if (flag & SOR_ZERO_INITIAL_GUESS) { 1619 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1620 its--; 1621 } 1622 1623 while (its--) { 1624 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1625 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1626 1627 /* update rhs: bb1 = bb - B*x */ 1628 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1629 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1630 1631 /* local sweep */ 1632 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1633 } 1634 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1635 if (flag & SOR_ZERO_INITIAL_GUESS) { 1636 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1637 its--; 1638 } 1639 while (its--) { 1640 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1642 1643 /* update rhs: bb1 = bb - B*x */ 1644 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1645 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1646 1647 /* local sweep */ 1648 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1649 } 1650 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1651 if (flag & SOR_ZERO_INITIAL_GUESS) { 1652 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1653 its--; 1654 } 1655 while (its--) { 1656 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1657 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1658 1659 /* update rhs: bb1 = bb - B*x */ 1660 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1661 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1662 1663 /* local sweep */ 1664 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1665 } 1666 } else if (flag & SOR_EISENSTAT) { 1667 Vec xx1; 1668 1669 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1670 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1671 1672 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1673 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1674 if (!mat->diag) { 1675 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1676 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1677 } 1678 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1679 if (hasop) { 1680 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1681 } else { 1682 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1683 } 1684 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1685 1686 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1687 1688 /* local sweep */ 1689 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1690 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1691 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1692 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1693 1694 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "MatPermute_MPIAIJ" 1700 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1701 { 1702 MPI_Comm comm; 1703 PetscInt first,local_rowsize,local_colsize; 1704 const PetscInt *rows; 1705 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1706 PetscErrorCode ierr; 1707 1708 PetscFunctionBegin; 1709 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1710 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1711 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1712 /* collect the global row permutation and invert it */ 1713 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1714 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1715 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1716 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1717 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1718 /* get the local target indices */ 1719 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1720 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1721 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1722 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1723 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1724 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1725 /* the column permutation is so much easier; 1726 make a local version of 'colp' and invert it */ 1727 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1728 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1729 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1730 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1731 /* now we just get the submatrix */ 1732 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1733 /* clean up */ 1734 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1735 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1736 PetscFunctionReturn(0); 1737 } 1738 1739 #undef __FUNCT__ 1740 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1741 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1742 { 1743 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1744 Mat A = mat->A,B = mat->B; 1745 PetscErrorCode ierr; 1746 PetscReal isend[5],irecv[5]; 1747 1748 PetscFunctionBegin; 1749 info->block_size = 1.0; 1750 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1751 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1752 isend[3] = info->memory; isend[4] = info->mallocs; 1753 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1754 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1755 isend[3] += info->memory; isend[4] += info->mallocs; 1756 if (flag == MAT_LOCAL) { 1757 info->nz_used = isend[0]; 1758 info->nz_allocated = isend[1]; 1759 info->nz_unneeded = isend[2]; 1760 info->memory = isend[3]; 1761 info->mallocs = isend[4]; 1762 } else if (flag == MAT_GLOBAL_MAX) { 1763 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1764 info->nz_used = irecv[0]; 1765 info->nz_allocated = irecv[1]; 1766 info->nz_unneeded = irecv[2]; 1767 info->memory = irecv[3]; 1768 info->mallocs = irecv[4]; 1769 } else if (flag == MAT_GLOBAL_SUM) { 1770 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1771 info->nz_used = irecv[0]; 1772 info->nz_allocated = irecv[1]; 1773 info->nz_unneeded = irecv[2]; 1774 info->memory = irecv[3]; 1775 info->mallocs = irecv[4]; 1776 } 1777 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1778 info->fill_ratio_needed = 0; 1779 info->factor_mallocs = 0; 1780 1781 PetscFunctionReturn(0); 1782 } 1783 1784 #undef __FUNCT__ 1785 #define __FUNCT__ "MatSetOption_MPIAIJ" 1786 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1787 { 1788 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1789 PetscErrorCode ierr; 1790 1791 PetscFunctionBegin; 1792 switch (op) { 1793 case MAT_NEW_NONZERO_LOCATIONS: 1794 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1795 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1796 case MAT_KEEP_NONZERO_PATTERN: 1797 case MAT_NEW_NONZERO_LOCATION_ERR: 1798 case MAT_USE_INODES: 1799 case MAT_IGNORE_ZERO_ENTRIES: 1800 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1801 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1802 break; 1803 case MAT_ROW_ORIENTED: 1804 a->roworiented = flg; 1805 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1806 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1807 break; 1808 case MAT_NEW_DIAGONALS: 1809 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1810 break; 1811 case MAT_IGNORE_OFF_PROC_ENTRIES: 1812 a->donotstash = flg; 1813 break; 1814 case MAT_SPD: 1815 A->spd_set = PETSC_TRUE; 1816 A->spd = flg; 1817 if (flg) { 1818 A->symmetric = PETSC_TRUE; 1819 A->structurally_symmetric = PETSC_TRUE; 1820 A->symmetric_set = PETSC_TRUE; 1821 A->structurally_symmetric_set = PETSC_TRUE; 1822 } 1823 break; 1824 case MAT_SYMMETRIC: 1825 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1826 break; 1827 case MAT_STRUCTURALLY_SYMMETRIC: 1828 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1829 break; 1830 case MAT_HERMITIAN: 1831 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1832 break; 1833 case MAT_SYMMETRY_ETERNAL: 1834 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1835 break; 1836 #if defined(PETSC_HAVE_TXPETSCGPU) 1837 // Sorry, I don't know how else to add MatOptions for data specific 1838 // to GPU classes without protecting against it in this case statement. 1839 // If anyone knows of a better way, let me know. Paul 1840 case MAT_DIAGBLOCK_CSR: 1841 case MAT_OFFDIAGBLOCK_CSR: 1842 case MAT_CSR: 1843 case MAT_DIAGBLOCK_DIA: 1844 case MAT_OFFDIAGBLOCK_DIA: 1845 case MAT_DIA: 1846 case MAT_DIAGBLOCK_ELL: 1847 case MAT_OFFDIAGBLOCK_ELL: 1848 case MAT_ELL: 1849 case MAT_DIAGBLOCK_HYB: 1850 case MAT_OFFDIAGBLOCK_HYB: 1851 case MAT_HYB: 1852 /* Not an error because MatSetOption_MPIAIJCUSP/CUSPARSE handle these options */ 1853 break; 1854 #endif 1855 default: 1856 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1857 } 1858 PetscFunctionReturn(0); 1859 } 1860 1861 #undef __FUNCT__ 1862 #define __FUNCT__ "MatGetRow_MPIAIJ" 1863 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1864 { 1865 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1866 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1867 PetscErrorCode ierr; 1868 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1869 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1870 PetscInt *cmap,*idx_p; 1871 1872 PetscFunctionBegin; 1873 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1874 mat->getrowactive = PETSC_TRUE; 1875 1876 if (!mat->rowvalues && (idx || v)) { 1877 /* 1878 allocate enough space to hold information from the longest row. 1879 */ 1880 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1881 PetscInt max = 1,tmp; 1882 for (i=0; i<matin->rmap->n; i++) { 1883 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1884 if (max < tmp) { max = tmp; } 1885 } 1886 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1887 } 1888 1889 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1890 lrow = row - rstart; 1891 1892 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1893 if (!v) {pvA = 0; pvB = 0;} 1894 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1895 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1896 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1897 nztot = nzA + nzB; 1898 1899 cmap = mat->garray; 1900 if (v || idx) { 1901 if (nztot) { 1902 /* Sort by increasing column numbers, assuming A and B already sorted */ 1903 PetscInt imark = -1; 1904 if (v) { 1905 *v = v_p = mat->rowvalues; 1906 for (i=0; i<nzB; i++) { 1907 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1908 else break; 1909 } 1910 imark = i; 1911 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1912 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1913 } 1914 if (idx) { 1915 *idx = idx_p = mat->rowindices; 1916 if (imark > -1) { 1917 for (i=0; i<imark; i++) { 1918 idx_p[i] = cmap[cworkB[i]]; 1919 } 1920 } else { 1921 for (i=0; i<nzB; i++) { 1922 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1923 else break; 1924 } 1925 imark = i; 1926 } 1927 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1928 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1929 } 1930 } else { 1931 if (idx) *idx = 0; 1932 if (v) *v = 0; 1933 } 1934 } 1935 *nz = nztot; 1936 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1937 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1938 PetscFunctionReturn(0); 1939 } 1940 1941 #undef __FUNCT__ 1942 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1943 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1944 { 1945 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1946 1947 PetscFunctionBegin; 1948 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1949 aij->getrowactive = PETSC_FALSE; 1950 PetscFunctionReturn(0); 1951 } 1952 1953 #undef __FUNCT__ 1954 #define __FUNCT__ "MatNorm_MPIAIJ" 1955 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1956 { 1957 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1958 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1959 PetscErrorCode ierr; 1960 PetscInt i,j,cstart = mat->cmap->rstart; 1961 PetscReal sum = 0.0; 1962 MatScalar *v; 1963 1964 PetscFunctionBegin; 1965 if (aij->size == 1) { 1966 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1967 } else { 1968 if (type == NORM_FROBENIUS) { 1969 v = amat->a; 1970 for (i=0; i<amat->nz; i++) { 1971 #if defined(PETSC_USE_COMPLEX) 1972 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1973 #else 1974 sum += (*v)*(*v); v++; 1975 #endif 1976 } 1977 v = bmat->a; 1978 for (i=0; i<bmat->nz; i++) { 1979 #if defined(PETSC_USE_COMPLEX) 1980 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1981 #else 1982 sum += (*v)*(*v); v++; 1983 #endif 1984 } 1985 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1986 *norm = PetscSqrtReal(*norm); 1987 } else if (type == NORM_1) { /* max column norm */ 1988 PetscReal *tmp,*tmp2; 1989 PetscInt *jj,*garray = aij->garray; 1990 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1991 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1992 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1993 *norm = 0.0; 1994 v = amat->a; jj = amat->j; 1995 for (j=0; j<amat->nz; j++) { 1996 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1997 } 1998 v = bmat->a; jj = bmat->j; 1999 for (j=0; j<bmat->nz; j++) { 2000 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2001 } 2002 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2003 for (j=0; j<mat->cmap->N; j++) { 2004 if (tmp2[j] > *norm) *norm = tmp2[j]; 2005 } 2006 ierr = PetscFree(tmp);CHKERRQ(ierr); 2007 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2008 } else if (type == NORM_INFINITY) { /* max row norm */ 2009 PetscReal ntemp = 0.0; 2010 for (j=0; j<aij->A->rmap->n; j++) { 2011 v = amat->a + amat->i[j]; 2012 sum = 0.0; 2013 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2014 sum += PetscAbsScalar(*v); v++; 2015 } 2016 v = bmat->a + bmat->i[j]; 2017 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2018 sum += PetscAbsScalar(*v); v++; 2019 } 2020 if (sum > ntemp) ntemp = sum; 2021 } 2022 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2023 } else { 2024 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2025 } 2026 } 2027 PetscFunctionReturn(0); 2028 } 2029 2030 #undef __FUNCT__ 2031 #define __FUNCT__ "MatTranspose_MPIAIJ" 2032 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2033 { 2034 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2035 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2036 PetscErrorCode ierr; 2037 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2038 PetscInt cstart=A->cmap->rstart,ncol; 2039 Mat B; 2040 MatScalar *array; 2041 2042 PetscFunctionBegin; 2043 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2044 2045 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2046 ai = Aloc->i; aj = Aloc->j; 2047 bi = Bloc->i; bj = Bloc->j; 2048 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2049 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2050 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2051 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2052 for (i=0; i<ai[ma]; i++){ 2053 d_nnz[aj[i]] ++; 2054 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2055 } 2056 2057 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2058 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2059 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2060 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2061 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2062 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2063 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2064 } else { 2065 B = *matout; 2066 } 2067 2068 /* copy over the A part */ 2069 array = Aloc->a; 2070 row = A->rmap->rstart; 2071 for (i=0; i<ma; i++) { 2072 ncol = ai[i+1]-ai[i]; 2073 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2074 row++; array += ncol; aj += ncol; 2075 } 2076 aj = Aloc->j; 2077 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2078 2079 /* copy over the B part */ 2080 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2081 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2082 array = Bloc->a; 2083 row = A->rmap->rstart; 2084 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2085 cols_tmp = cols; 2086 for (i=0; i<mb; i++) { 2087 ncol = bi[i+1]-bi[i]; 2088 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2089 row++; array += ncol; cols_tmp += ncol; 2090 } 2091 ierr = PetscFree(cols);CHKERRQ(ierr); 2092 2093 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2094 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2095 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2096 *matout = B; 2097 } else { 2098 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2099 } 2100 PetscFunctionReturn(0); 2101 } 2102 2103 #undef __FUNCT__ 2104 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2105 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2106 { 2107 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2108 Mat a = aij->A,b = aij->B; 2109 PetscErrorCode ierr; 2110 PetscInt s1,s2,s3; 2111 2112 PetscFunctionBegin; 2113 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2114 if (rr) { 2115 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2116 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2117 /* Overlap communication with computation. */ 2118 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2119 } 2120 if (ll) { 2121 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2122 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2123 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2124 } 2125 /* scale the diagonal block */ 2126 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2127 2128 if (rr) { 2129 /* Do a scatter end and then right scale the off-diagonal block */ 2130 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2131 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2132 } 2133 2134 PetscFunctionReturn(0); 2135 } 2136 2137 #undef __FUNCT__ 2138 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2139 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2140 { 2141 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2142 PetscErrorCode ierr; 2143 2144 PetscFunctionBegin; 2145 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2146 PetscFunctionReturn(0); 2147 } 2148 2149 #undef __FUNCT__ 2150 #define __FUNCT__ "MatEqual_MPIAIJ" 2151 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2152 { 2153 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2154 Mat a,b,c,d; 2155 PetscBool flg; 2156 PetscErrorCode ierr; 2157 2158 PetscFunctionBegin; 2159 a = matA->A; b = matA->B; 2160 c = matB->A; d = matB->B; 2161 2162 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2163 if (flg) { 2164 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2165 } 2166 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 #undef __FUNCT__ 2171 #define __FUNCT__ "MatCopy_MPIAIJ" 2172 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2173 { 2174 PetscErrorCode ierr; 2175 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2176 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2177 2178 PetscFunctionBegin; 2179 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2180 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2181 /* because of the column compression in the off-processor part of the matrix a->B, 2182 the number of columns in a->B and b->B may be different, hence we cannot call 2183 the MatCopy() directly on the two parts. If need be, we can provide a more 2184 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2185 then copying the submatrices */ 2186 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2187 } else { 2188 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2189 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2190 } 2191 PetscFunctionReturn(0); 2192 } 2193 2194 #undef __FUNCT__ 2195 #define __FUNCT__ "MatSetUp_MPIAIJ" 2196 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2197 { 2198 PetscErrorCode ierr; 2199 2200 PetscFunctionBegin; 2201 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2202 PetscFunctionReturn(0); 2203 } 2204 2205 #undef __FUNCT__ 2206 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2207 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2208 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2209 { 2210 PetscInt i,m=Y->rmap->N; 2211 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2212 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2213 const PetscInt *xi = x->i,*yi = y->i; 2214 2215 PetscFunctionBegin; 2216 /* Set the number of nonzeros in the new matrix */ 2217 for(i=0; i<m; i++) { 2218 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2219 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2220 nnz[i] = 0; 2221 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2222 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2223 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2224 nnz[i]++; 2225 } 2226 for (; k<nzy; k++) nnz[i]++; 2227 } 2228 PetscFunctionReturn(0); 2229 } 2230 2231 #undef __FUNCT__ 2232 #define __FUNCT__ "MatAXPY_MPIAIJ" 2233 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2234 { 2235 PetscErrorCode ierr; 2236 PetscInt i; 2237 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2238 PetscBLASInt bnz,one=1; 2239 Mat_SeqAIJ *x,*y; 2240 2241 PetscFunctionBegin; 2242 if (str == SAME_NONZERO_PATTERN) { 2243 PetscScalar alpha = a; 2244 x = (Mat_SeqAIJ *)xx->A->data; 2245 y = (Mat_SeqAIJ *)yy->A->data; 2246 bnz = PetscBLASIntCast(x->nz); 2247 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2248 x = (Mat_SeqAIJ *)xx->B->data; 2249 y = (Mat_SeqAIJ *)yy->B->data; 2250 bnz = PetscBLASIntCast(x->nz); 2251 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2252 } else if (str == SUBSET_NONZERO_PATTERN) { 2253 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2254 2255 x = (Mat_SeqAIJ *)xx->B->data; 2256 y = (Mat_SeqAIJ *)yy->B->data; 2257 if (y->xtoy && y->XtoY != xx->B) { 2258 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2259 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2260 } 2261 if (!y->xtoy) { /* get xtoy */ 2262 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2263 y->XtoY = xx->B; 2264 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2265 } 2266 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2267 } else { 2268 Mat B; 2269 PetscInt *nnz_d,*nnz_o; 2270 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2271 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2272 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2273 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2274 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2275 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2276 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2277 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2278 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2279 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2280 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2281 ierr = MatHeaderReplace(Y,B); 2282 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2283 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2284 } 2285 PetscFunctionReturn(0); 2286 } 2287 2288 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2289 2290 #undef __FUNCT__ 2291 #define __FUNCT__ "MatConjugate_MPIAIJ" 2292 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2293 { 2294 #if defined(PETSC_USE_COMPLEX) 2295 PetscErrorCode ierr; 2296 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2297 2298 PetscFunctionBegin; 2299 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2300 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2301 #else 2302 PetscFunctionBegin; 2303 #endif 2304 PetscFunctionReturn(0); 2305 } 2306 2307 #undef __FUNCT__ 2308 #define __FUNCT__ "MatRealPart_MPIAIJ" 2309 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2310 { 2311 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2312 PetscErrorCode ierr; 2313 2314 PetscFunctionBegin; 2315 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2316 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2317 PetscFunctionReturn(0); 2318 } 2319 2320 #undef __FUNCT__ 2321 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2322 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2323 { 2324 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2325 PetscErrorCode ierr; 2326 2327 PetscFunctionBegin; 2328 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2329 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2330 PetscFunctionReturn(0); 2331 } 2332 2333 #ifdef PETSC_HAVE_PBGL 2334 2335 #include <boost/parallel/mpi/bsp_process_group.hpp> 2336 #include <boost/graph/distributed/ilu_default_graph.hpp> 2337 #include <boost/graph/distributed/ilu_0_block.hpp> 2338 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2339 #include <boost/graph/distributed/petsc/interface.hpp> 2340 #include <boost/multi_array.hpp> 2341 #include <boost/parallel/distributed_property_map->hpp> 2342 2343 #undef __FUNCT__ 2344 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2345 /* 2346 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2347 */ 2348 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2349 { 2350 namespace petsc = boost::distributed::petsc; 2351 2352 namespace graph_dist = boost::graph::distributed; 2353 using boost::graph::distributed::ilu_default::process_group_type; 2354 using boost::graph::ilu_permuted; 2355 2356 PetscBool row_identity, col_identity; 2357 PetscContainer c; 2358 PetscInt m, n, M, N; 2359 PetscErrorCode ierr; 2360 2361 PetscFunctionBegin; 2362 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2363 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2364 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2365 if (!row_identity || !col_identity) { 2366 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2367 } 2368 2369 process_group_type pg; 2370 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2371 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2372 lgraph_type& level_graph = *lgraph_p; 2373 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2374 2375 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2376 ilu_permuted(level_graph); 2377 2378 /* put together the new matrix */ 2379 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2380 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2381 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2382 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2383 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2384 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2385 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2386 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2387 2388 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2389 ierr = PetscContainerSetPointer(c, lgraph_p); 2390 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2391 ierr = PetscContainerDestroy(&c); 2392 PetscFunctionReturn(0); 2393 } 2394 2395 #undef __FUNCT__ 2396 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2397 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2398 { 2399 PetscFunctionBegin; 2400 PetscFunctionReturn(0); 2401 } 2402 2403 #undef __FUNCT__ 2404 #define __FUNCT__ "MatSolve_MPIAIJ" 2405 /* 2406 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2407 */ 2408 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2409 { 2410 namespace graph_dist = boost::graph::distributed; 2411 2412 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2413 lgraph_type* lgraph_p; 2414 PetscContainer c; 2415 PetscErrorCode ierr; 2416 2417 PetscFunctionBegin; 2418 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2419 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2420 ierr = VecCopy(b, x);CHKERRQ(ierr); 2421 2422 PetscScalar* array_x; 2423 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2424 PetscInt sx; 2425 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2426 2427 PetscScalar* array_b; 2428 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2429 PetscInt sb; 2430 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2431 2432 lgraph_type& level_graph = *lgraph_p; 2433 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2434 2435 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2436 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2437 ref_x(array_x, boost::extents[num_vertices(graph)]); 2438 2439 typedef boost::iterator_property_map<array_ref_type::iterator, 2440 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2441 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2442 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2443 2444 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2445 2446 PetscFunctionReturn(0); 2447 } 2448 #endif 2449 2450 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2451 PetscInt nzlocal,nsends,nrecvs; 2452 PetscMPIInt *send_rank,*recv_rank; 2453 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2454 PetscScalar *sbuf_a,**rbuf_a; 2455 PetscErrorCode (*Destroy)(Mat); 2456 } Mat_Redundant; 2457 2458 #undef __FUNCT__ 2459 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2460 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2461 { 2462 PetscErrorCode ierr; 2463 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2464 PetscInt i; 2465 2466 PetscFunctionBegin; 2467 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2468 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2469 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2470 for (i=0; i<redund->nrecvs; i++){ 2471 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2472 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2473 } 2474 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2475 ierr = PetscFree(redund);CHKERRQ(ierr); 2476 PetscFunctionReturn(0); 2477 } 2478 2479 #undef __FUNCT__ 2480 #define __FUNCT__ "MatDestroy_MatRedundant" 2481 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2482 { 2483 PetscErrorCode ierr; 2484 PetscContainer container; 2485 Mat_Redundant *redund=PETSC_NULL; 2486 2487 PetscFunctionBegin; 2488 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2489 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2490 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2491 A->ops->destroy = redund->Destroy; 2492 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2493 if (A->ops->destroy) { 2494 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2495 } 2496 PetscFunctionReturn(0); 2497 } 2498 2499 #undef __FUNCT__ 2500 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2501 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2502 { 2503 PetscMPIInt rank,size; 2504 MPI_Comm comm=((PetscObject)mat)->comm; 2505 PetscErrorCode ierr; 2506 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2507 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2508 PetscInt *rowrange=mat->rmap->range; 2509 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2510 Mat A=aij->A,B=aij->B,C=*matredundant; 2511 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2512 PetscScalar *sbuf_a; 2513 PetscInt nzlocal=a->nz+b->nz; 2514 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2515 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2516 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2517 MatScalar *aworkA,*aworkB; 2518 PetscScalar *vals; 2519 PetscMPIInt tag1,tag2,tag3,imdex; 2520 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2521 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2522 MPI_Status recv_status,*send_status; 2523 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2524 PetscInt **rbuf_j=PETSC_NULL; 2525 PetscScalar **rbuf_a=PETSC_NULL; 2526 Mat_Redundant *redund=PETSC_NULL; 2527 PetscContainer container; 2528 2529 PetscFunctionBegin; 2530 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2531 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2532 2533 if (reuse == MAT_REUSE_MATRIX) { 2534 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2535 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2536 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2537 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2538 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2539 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2540 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2541 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2542 2543 nsends = redund->nsends; 2544 nrecvs = redund->nrecvs; 2545 send_rank = redund->send_rank; 2546 recv_rank = redund->recv_rank; 2547 sbuf_nz = redund->sbuf_nz; 2548 rbuf_nz = redund->rbuf_nz; 2549 sbuf_j = redund->sbuf_j; 2550 sbuf_a = redund->sbuf_a; 2551 rbuf_j = redund->rbuf_j; 2552 rbuf_a = redund->rbuf_a; 2553 } 2554 2555 if (reuse == MAT_INITIAL_MATRIX){ 2556 PetscMPIInt subrank,subsize; 2557 PetscInt nleftover,np_subcomm; 2558 /* get the destination processors' id send_rank, nsends and nrecvs */ 2559 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2560 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2561 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2562 np_subcomm = size/nsubcomm; 2563 nleftover = size - nsubcomm*np_subcomm; 2564 nsends = 0; nrecvs = 0; 2565 for (i=0; i<size; i++){ /* i=rank*/ 2566 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2567 send_rank[nsends] = i; nsends++; 2568 recv_rank[nrecvs++] = i; 2569 } 2570 } 2571 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2572 i = size-nleftover-1; 2573 j = 0; 2574 while (j < nsubcomm - nleftover){ 2575 send_rank[nsends++] = i; 2576 i--; j++; 2577 } 2578 } 2579 2580 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2581 for (i=0; i<nleftover; i++){ 2582 recv_rank[nrecvs++] = size-nleftover+i; 2583 } 2584 } 2585 2586 /* allocate sbuf_j, sbuf_a */ 2587 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2588 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2589 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2590 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2591 2592 /* copy mat's local entries into the buffers */ 2593 if (reuse == MAT_INITIAL_MATRIX){ 2594 rownz_max = 0; 2595 rptr = sbuf_j; 2596 cols = sbuf_j + rend-rstart + 1; 2597 vals = sbuf_a; 2598 rptr[0] = 0; 2599 for (i=0; i<rend-rstart; i++){ 2600 row = i + rstart; 2601 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2602 ncols = nzA + nzB; 2603 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2604 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2605 /* load the column indices for this row into cols */ 2606 lwrite = 0; 2607 for (l=0; l<nzB; l++) { 2608 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2609 vals[lwrite] = aworkB[l]; 2610 cols[lwrite++] = ctmp; 2611 } 2612 } 2613 for (l=0; l<nzA; l++){ 2614 vals[lwrite] = aworkA[l]; 2615 cols[lwrite++] = cstart + cworkA[l]; 2616 } 2617 for (l=0; l<nzB; l++) { 2618 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2619 vals[lwrite] = aworkB[l]; 2620 cols[lwrite++] = ctmp; 2621 } 2622 } 2623 vals += ncols; 2624 cols += ncols; 2625 rptr[i+1] = rptr[i] + ncols; 2626 if (rownz_max < ncols) rownz_max = ncols; 2627 } 2628 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2629 } else { /* only copy matrix values into sbuf_a */ 2630 rptr = sbuf_j; 2631 vals = sbuf_a; 2632 rptr[0] = 0; 2633 for (i=0; i<rend-rstart; i++){ 2634 row = i + rstart; 2635 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2636 ncols = nzA + nzB; 2637 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2638 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2639 lwrite = 0; 2640 for (l=0; l<nzB; l++) { 2641 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2642 } 2643 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2644 for (l=0; l<nzB; l++) { 2645 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2646 } 2647 vals += ncols; 2648 rptr[i+1] = rptr[i] + ncols; 2649 } 2650 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2651 2652 /* send nzlocal to others, and recv other's nzlocal */ 2653 /*--------------------------------------------------*/ 2654 if (reuse == MAT_INITIAL_MATRIX){ 2655 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2656 s_waits2 = s_waits3 + nsends; 2657 s_waits1 = s_waits2 + nsends; 2658 r_waits1 = s_waits1 + nsends; 2659 r_waits2 = r_waits1 + nrecvs; 2660 r_waits3 = r_waits2 + nrecvs; 2661 } else { 2662 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2663 r_waits3 = s_waits3 + nsends; 2664 } 2665 2666 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2667 if (reuse == MAT_INITIAL_MATRIX){ 2668 /* get new tags to keep the communication clean */ 2669 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2670 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2671 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2672 2673 /* post receives of other's nzlocal */ 2674 for (i=0; i<nrecvs; i++){ 2675 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2676 } 2677 /* send nzlocal to others */ 2678 for (i=0; i<nsends; i++){ 2679 sbuf_nz[i] = nzlocal; 2680 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2681 } 2682 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2683 count = nrecvs; 2684 while (count) { 2685 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2686 recv_rank[imdex] = recv_status.MPI_SOURCE; 2687 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2688 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2689 2690 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2691 rbuf_nz[imdex] += i + 2; 2692 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2693 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2694 count--; 2695 } 2696 /* wait on sends of nzlocal */ 2697 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2698 /* send mat->i,j to others, and recv from other's */ 2699 /*------------------------------------------------*/ 2700 for (i=0; i<nsends; i++){ 2701 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2702 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2703 } 2704 /* wait on receives of mat->i,j */ 2705 /*------------------------------*/ 2706 count = nrecvs; 2707 while (count) { 2708 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2709 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2710 count--; 2711 } 2712 /* wait on sends of mat->i,j */ 2713 /*---------------------------*/ 2714 if (nsends) { 2715 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2716 } 2717 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2718 2719 /* post receives, send and receive mat->a */ 2720 /*----------------------------------------*/ 2721 for (imdex=0; imdex<nrecvs; imdex++) { 2722 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2723 } 2724 for (i=0; i<nsends; i++){ 2725 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2726 } 2727 count = nrecvs; 2728 while (count) { 2729 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2730 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2731 count--; 2732 } 2733 if (nsends) { 2734 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2735 } 2736 2737 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2738 2739 /* create redundant matrix */ 2740 /*-------------------------*/ 2741 if (reuse == MAT_INITIAL_MATRIX){ 2742 /* compute rownz_max for preallocation */ 2743 for (imdex=0; imdex<nrecvs; imdex++){ 2744 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2745 rptr = rbuf_j[imdex]; 2746 for (i=0; i<j; i++){ 2747 ncols = rptr[i+1] - rptr[i]; 2748 if (rownz_max < ncols) rownz_max = ncols; 2749 } 2750 } 2751 2752 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2753 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2754 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2755 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2756 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2757 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2758 } else { 2759 C = *matredundant; 2760 } 2761 2762 /* insert local matrix entries */ 2763 rptr = sbuf_j; 2764 cols = sbuf_j + rend-rstart + 1; 2765 vals = sbuf_a; 2766 for (i=0; i<rend-rstart; i++){ 2767 row = i + rstart; 2768 ncols = rptr[i+1] - rptr[i]; 2769 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2770 vals += ncols; 2771 cols += ncols; 2772 } 2773 /* insert received matrix entries */ 2774 for (imdex=0; imdex<nrecvs; imdex++){ 2775 rstart = rowrange[recv_rank[imdex]]; 2776 rend = rowrange[recv_rank[imdex]+1]; 2777 rptr = rbuf_j[imdex]; 2778 cols = rbuf_j[imdex] + rend-rstart + 1; 2779 vals = rbuf_a[imdex]; 2780 for (i=0; i<rend-rstart; i++){ 2781 row = i + rstart; 2782 ncols = rptr[i+1] - rptr[i]; 2783 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2784 vals += ncols; 2785 cols += ncols; 2786 } 2787 } 2788 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2789 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2790 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2791 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2792 if (reuse == MAT_INITIAL_MATRIX) { 2793 PetscContainer container; 2794 *matredundant = C; 2795 /* create a supporting struct and attach it to C for reuse */ 2796 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2797 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2798 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2799 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2800 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2801 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2802 2803 redund->nzlocal = nzlocal; 2804 redund->nsends = nsends; 2805 redund->nrecvs = nrecvs; 2806 redund->send_rank = send_rank; 2807 redund->recv_rank = recv_rank; 2808 redund->sbuf_nz = sbuf_nz; 2809 redund->rbuf_nz = rbuf_nz; 2810 redund->sbuf_j = sbuf_j; 2811 redund->sbuf_a = sbuf_a; 2812 redund->rbuf_j = rbuf_j; 2813 redund->rbuf_a = rbuf_a; 2814 2815 redund->Destroy = C->ops->destroy; 2816 C->ops->destroy = MatDestroy_MatRedundant; 2817 } 2818 PetscFunctionReturn(0); 2819 } 2820 2821 #undef __FUNCT__ 2822 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2823 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2824 { 2825 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2826 PetscErrorCode ierr; 2827 PetscInt i,*idxb = 0; 2828 PetscScalar *va,*vb; 2829 Vec vtmp; 2830 2831 PetscFunctionBegin; 2832 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2833 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2834 if (idx) { 2835 for (i=0; i<A->rmap->n; i++) { 2836 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2837 } 2838 } 2839 2840 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2841 if (idx) { 2842 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2843 } 2844 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2845 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2846 2847 for (i=0; i<A->rmap->n; i++){ 2848 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2849 va[i] = vb[i]; 2850 if (idx) idx[i] = a->garray[idxb[i]]; 2851 } 2852 } 2853 2854 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2855 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2856 ierr = PetscFree(idxb);CHKERRQ(ierr); 2857 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2858 PetscFunctionReturn(0); 2859 } 2860 2861 #undef __FUNCT__ 2862 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2863 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2864 { 2865 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2866 PetscErrorCode ierr; 2867 PetscInt i,*idxb = 0; 2868 PetscScalar *va,*vb; 2869 Vec vtmp; 2870 2871 PetscFunctionBegin; 2872 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2873 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2874 if (idx) { 2875 for (i=0; i<A->cmap->n; i++) { 2876 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2877 } 2878 } 2879 2880 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2881 if (idx) { 2882 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2883 } 2884 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2885 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2886 2887 for (i=0; i<A->rmap->n; i++){ 2888 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2889 va[i] = vb[i]; 2890 if (idx) idx[i] = a->garray[idxb[i]]; 2891 } 2892 } 2893 2894 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2895 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2896 ierr = PetscFree(idxb);CHKERRQ(ierr); 2897 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2898 PetscFunctionReturn(0); 2899 } 2900 2901 #undef __FUNCT__ 2902 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2903 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2904 { 2905 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2906 PetscInt n = A->rmap->n; 2907 PetscInt cstart = A->cmap->rstart; 2908 PetscInt *cmap = mat->garray; 2909 PetscInt *diagIdx, *offdiagIdx; 2910 Vec diagV, offdiagV; 2911 PetscScalar *a, *diagA, *offdiagA; 2912 PetscInt r; 2913 PetscErrorCode ierr; 2914 2915 PetscFunctionBegin; 2916 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2917 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2918 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2919 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2920 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2921 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2922 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2923 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2924 for(r = 0; r < n; ++r) { 2925 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2926 a[r] = diagA[r]; 2927 idx[r] = cstart + diagIdx[r]; 2928 } else { 2929 a[r] = offdiagA[r]; 2930 idx[r] = cmap[offdiagIdx[r]]; 2931 } 2932 } 2933 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2934 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2935 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2936 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2937 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2938 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2939 PetscFunctionReturn(0); 2940 } 2941 2942 #undef __FUNCT__ 2943 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2944 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2945 { 2946 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2947 PetscInt n = A->rmap->n; 2948 PetscInt cstart = A->cmap->rstart; 2949 PetscInt *cmap = mat->garray; 2950 PetscInt *diagIdx, *offdiagIdx; 2951 Vec diagV, offdiagV; 2952 PetscScalar *a, *diagA, *offdiagA; 2953 PetscInt r; 2954 PetscErrorCode ierr; 2955 2956 PetscFunctionBegin; 2957 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2958 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2959 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2960 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2961 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2962 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2963 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2964 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2965 for(r = 0; r < n; ++r) { 2966 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2967 a[r] = diagA[r]; 2968 idx[r] = cstart + diagIdx[r]; 2969 } else { 2970 a[r] = offdiagA[r]; 2971 idx[r] = cmap[offdiagIdx[r]]; 2972 } 2973 } 2974 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2975 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2976 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2977 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2978 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2979 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2980 PetscFunctionReturn(0); 2981 } 2982 2983 #undef __FUNCT__ 2984 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2985 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2986 { 2987 PetscErrorCode ierr; 2988 Mat *dummy; 2989 2990 PetscFunctionBegin; 2991 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2992 *newmat = *dummy; 2993 ierr = PetscFree(dummy);CHKERRQ(ierr); 2994 PetscFunctionReturn(0); 2995 } 2996 2997 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2998 2999 #undef __FUNCT__ 3000 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3001 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3002 { 3003 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3004 PetscErrorCode ierr; 3005 3006 PetscFunctionBegin; 3007 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3008 PetscFunctionReturn(0); 3009 } 3010 3011 3012 /* -------------------------------------------------------------------*/ 3013 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3014 MatGetRow_MPIAIJ, 3015 MatRestoreRow_MPIAIJ, 3016 MatMult_MPIAIJ, 3017 /* 4*/ MatMultAdd_MPIAIJ, 3018 MatMultTranspose_MPIAIJ, 3019 MatMultTransposeAdd_MPIAIJ, 3020 #ifdef PETSC_HAVE_PBGL 3021 MatSolve_MPIAIJ, 3022 #else 3023 0, 3024 #endif 3025 0, 3026 0, 3027 /*10*/ 0, 3028 0, 3029 0, 3030 MatSOR_MPIAIJ, 3031 MatTranspose_MPIAIJ, 3032 /*15*/ MatGetInfo_MPIAIJ, 3033 MatEqual_MPIAIJ, 3034 MatGetDiagonal_MPIAIJ, 3035 MatDiagonalScale_MPIAIJ, 3036 MatNorm_MPIAIJ, 3037 /*20*/ MatAssemblyBegin_MPIAIJ, 3038 MatAssemblyEnd_MPIAIJ, 3039 MatSetOption_MPIAIJ, 3040 MatZeroEntries_MPIAIJ, 3041 /*24*/ MatZeroRows_MPIAIJ, 3042 0, 3043 #ifdef PETSC_HAVE_PBGL 3044 0, 3045 #else 3046 0, 3047 #endif 3048 0, 3049 0, 3050 /*29*/ MatSetUp_MPIAIJ, 3051 #ifdef PETSC_HAVE_PBGL 3052 0, 3053 #else 3054 0, 3055 #endif 3056 0, 3057 0, 3058 0, 3059 /*34*/ MatDuplicate_MPIAIJ, 3060 0, 3061 0, 3062 0, 3063 0, 3064 /*39*/ MatAXPY_MPIAIJ, 3065 MatGetSubMatrices_MPIAIJ, 3066 MatIncreaseOverlap_MPIAIJ, 3067 MatGetValues_MPIAIJ, 3068 MatCopy_MPIAIJ, 3069 /*44*/ MatGetRowMax_MPIAIJ, 3070 MatScale_MPIAIJ, 3071 0, 3072 0, 3073 MatZeroRowsColumns_MPIAIJ, 3074 /*49*/ 0, 3075 0, 3076 0, 3077 0, 3078 0, 3079 /*54*/ MatFDColoringCreate_MPIAIJ, 3080 0, 3081 MatSetUnfactored_MPIAIJ, 3082 0, /* MatPermute_MPIAIJ, impl currently broken */ 3083 0, 3084 /*59*/ MatGetSubMatrix_MPIAIJ, 3085 MatDestroy_MPIAIJ, 3086 MatView_MPIAIJ, 3087 0, 3088 0, 3089 /*64*/ 0, 3090 0, 3091 0, 3092 0, 3093 0, 3094 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3095 MatGetRowMinAbs_MPIAIJ, 3096 0, 3097 MatSetColoring_MPIAIJ, 3098 #if defined(PETSC_HAVE_ADIC) 3099 MatSetValuesAdic_MPIAIJ, 3100 #else 3101 0, 3102 #endif 3103 MatSetValuesAdifor_MPIAIJ, 3104 /*75*/ MatFDColoringApply_AIJ, 3105 0, 3106 0, 3107 0, 3108 0, 3109 /*80*/ 0, 3110 0, 3111 0, 3112 /*83*/ MatLoad_MPIAIJ, 3113 0, 3114 0, 3115 0, 3116 0, 3117 0, 3118 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3119 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3120 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3121 MatPtAP_Basic, 3122 MatPtAPSymbolic_MPIAIJ, 3123 /*94*/ MatPtAPNumeric_MPIAIJ, 3124 0, 3125 0, 3126 0, 3127 0, 3128 /*99*/ 0, 3129 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3130 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3131 MatConjugate_MPIAIJ, 3132 0, 3133 /*104*/MatSetValuesRow_MPIAIJ, 3134 MatRealPart_MPIAIJ, 3135 MatImaginaryPart_MPIAIJ, 3136 0, 3137 0, 3138 /*109*/0, 3139 MatGetRedundantMatrix_MPIAIJ, 3140 MatGetRowMin_MPIAIJ, 3141 0, 3142 0, 3143 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3144 0, 3145 0, 3146 0, 3147 0, 3148 /*119*/0, 3149 0, 3150 0, 3151 0, 3152 MatGetMultiProcBlock_MPIAIJ, 3153 /*124*/MatFindNonZeroRows_MPIAIJ, 3154 MatGetColumnNorms_MPIAIJ, 3155 MatInvertBlockDiagonal_MPIAIJ, 3156 0, 3157 MatGetSubMatricesParallel_MPIAIJ, 3158 /*129*/0, 3159 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3160 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3161 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3162 0, 3163 /*134*/0, 3164 0, 3165 0, 3166 0, 3167 0 3168 }; 3169 3170 /* ----------------------------------------------------------------------------------------*/ 3171 3172 EXTERN_C_BEGIN 3173 #undef __FUNCT__ 3174 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3175 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3176 { 3177 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3178 PetscErrorCode ierr; 3179 3180 PetscFunctionBegin; 3181 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3182 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3183 PetscFunctionReturn(0); 3184 } 3185 EXTERN_C_END 3186 3187 EXTERN_C_BEGIN 3188 #undef __FUNCT__ 3189 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3190 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3191 { 3192 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3193 PetscErrorCode ierr; 3194 3195 PetscFunctionBegin; 3196 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3197 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3198 PetscFunctionReturn(0); 3199 } 3200 EXTERN_C_END 3201 3202 EXTERN_C_BEGIN 3203 #undef __FUNCT__ 3204 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3205 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3206 { 3207 Mat_MPIAIJ *b; 3208 PetscErrorCode ierr; 3209 PetscInt i; 3210 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3211 3212 PetscFunctionBegin; 3213 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3214 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3215 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3216 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3217 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3218 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3219 3220 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3221 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3222 if (d_nnz) { 3223 for (i=0; i<B->rmap->n; i++) { 3224 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3225 } 3226 } 3227 if (o_nnz) { 3228 for (i=0; i<B->rmap->n; i++) { 3229 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3230 } 3231 } 3232 b = (Mat_MPIAIJ*)B->data; 3233 3234 if (!B->preallocated) { 3235 /* Explicitly create 2 MATSEQAIJ matrices. */ 3236 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3237 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3238 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3239 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3240 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3241 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3242 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3243 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3244 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3245 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3246 } 3247 3248 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3249 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3250 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3251 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3252 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3253 B->preallocated = PETSC_TRUE; 3254 PetscFunctionReturn(0); 3255 } 3256 EXTERN_C_END 3257 3258 #undef __FUNCT__ 3259 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3260 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3261 { 3262 Mat mat; 3263 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3264 PetscErrorCode ierr; 3265 3266 PetscFunctionBegin; 3267 *newmat = 0; 3268 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3269 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3270 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3271 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3272 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3273 a = (Mat_MPIAIJ*)mat->data; 3274 3275 mat->factortype = matin->factortype; 3276 mat->rmap->bs = matin->rmap->bs; 3277 mat->cmap->bs = matin->cmap->bs; 3278 mat->assembled = PETSC_TRUE; 3279 mat->insertmode = NOT_SET_VALUES; 3280 mat->preallocated = PETSC_TRUE; 3281 3282 a->size = oldmat->size; 3283 a->rank = oldmat->rank; 3284 a->donotstash = oldmat->donotstash; 3285 a->roworiented = oldmat->roworiented; 3286 a->rowindices = 0; 3287 a->rowvalues = 0; 3288 a->getrowactive = PETSC_FALSE; 3289 3290 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3291 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3292 3293 if (oldmat->colmap) { 3294 #if defined (PETSC_USE_CTABLE) 3295 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3296 #else 3297 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3298 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3299 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3300 #endif 3301 } else a->colmap = 0; 3302 if (oldmat->garray) { 3303 PetscInt len; 3304 len = oldmat->B->cmap->n; 3305 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3306 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3307 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3308 } else a->garray = 0; 3309 3310 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3311 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3312 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3313 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3314 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3315 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3316 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3317 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3318 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3319 *newmat = mat; 3320 PetscFunctionReturn(0); 3321 } 3322 3323 3324 3325 #undef __FUNCT__ 3326 #define __FUNCT__ "MatLoad_MPIAIJ" 3327 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3328 { 3329 PetscScalar *vals,*svals; 3330 MPI_Comm comm = ((PetscObject)viewer)->comm; 3331 PetscErrorCode ierr; 3332 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3333 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3334 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3335 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3336 PetscInt cend,cstart,n,*rowners,sizesset=1; 3337 int fd; 3338 PetscInt bs = 1; 3339 3340 PetscFunctionBegin; 3341 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3342 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3343 if (!rank) { 3344 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3345 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3346 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3347 } 3348 3349 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3350 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3351 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3352 3353 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3354 3355 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3356 M = header[1]; N = header[2]; 3357 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3358 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3359 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3360 3361 /* If global sizes are set, check if they are consistent with that given in the file */ 3362 if (sizesset) { 3363 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3364 } 3365 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3366 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3367 3368 /* determine ownership of all (block) rows */ 3369 if( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3370 if (newMat->rmap->n < 0 ) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3371 else m = newMat->rmap->n; /* Set by user */ 3372 3373 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3374 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3375 3376 /* First process needs enough room for process with most rows */ 3377 if (!rank) { 3378 mmax = rowners[1]; 3379 for (i=2; i<size; i++) { 3380 mmax = PetscMax(mmax,rowners[i]); 3381 } 3382 } else mmax = m; 3383 3384 rowners[0] = 0; 3385 for (i=2; i<=size; i++) { 3386 rowners[i] += rowners[i-1]; 3387 } 3388 rstart = rowners[rank]; 3389 rend = rowners[rank+1]; 3390 3391 /* distribute row lengths to all processors */ 3392 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3393 if (!rank) { 3394 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3395 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3396 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3397 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3398 for (j=0; j<m; j++) { 3399 procsnz[0] += ourlens[j]; 3400 } 3401 for (i=1; i<size; i++) { 3402 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3403 /* calculate the number of nonzeros on each processor */ 3404 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3405 procsnz[i] += rowlengths[j]; 3406 } 3407 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3408 } 3409 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3410 } else { 3411 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3412 } 3413 3414 if (!rank) { 3415 /* determine max buffer needed and allocate it */ 3416 maxnz = 0; 3417 for (i=0; i<size; i++) { 3418 maxnz = PetscMax(maxnz,procsnz[i]); 3419 } 3420 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3421 3422 /* read in my part of the matrix column indices */ 3423 nz = procsnz[0]; 3424 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3425 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3426 3427 /* read in every one elses and ship off */ 3428 for (i=1; i<size; i++) { 3429 nz = procsnz[i]; 3430 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3431 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3432 } 3433 ierr = PetscFree(cols);CHKERRQ(ierr); 3434 } else { 3435 /* determine buffer space needed for message */ 3436 nz = 0; 3437 for (i=0; i<m; i++) { 3438 nz += ourlens[i]; 3439 } 3440 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3441 3442 /* receive message of column indices*/ 3443 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3444 } 3445 3446 /* determine column ownership if matrix is not square */ 3447 if (N != M) { 3448 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3449 else n = newMat->cmap->n; 3450 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3451 cstart = cend - n; 3452 } else { 3453 cstart = rstart; 3454 cend = rend; 3455 n = cend - cstart; 3456 } 3457 3458 /* loop over local rows, determining number of off diagonal entries */ 3459 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3460 jj = 0; 3461 for (i=0; i<m; i++) { 3462 for (j=0; j<ourlens[i]; j++) { 3463 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3464 jj++; 3465 } 3466 } 3467 3468 for (i=0; i<m; i++) { 3469 ourlens[i] -= offlens[i]; 3470 } 3471 if (!sizesset) { 3472 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3473 } 3474 3475 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3476 3477 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3478 3479 for (i=0; i<m; i++) { 3480 ourlens[i] += offlens[i]; 3481 } 3482 3483 if (!rank) { 3484 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3485 3486 /* read in my part of the matrix numerical values */ 3487 nz = procsnz[0]; 3488 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3489 3490 /* insert into matrix */ 3491 jj = rstart; 3492 smycols = mycols; 3493 svals = vals; 3494 for (i=0; i<m; i++) { 3495 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3496 smycols += ourlens[i]; 3497 svals += ourlens[i]; 3498 jj++; 3499 } 3500 3501 /* read in other processors and ship out */ 3502 for (i=1; i<size; i++) { 3503 nz = procsnz[i]; 3504 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3505 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3506 } 3507 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3508 } else { 3509 /* receive numeric values */ 3510 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3511 3512 /* receive message of values*/ 3513 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3514 3515 /* insert into matrix */ 3516 jj = rstart; 3517 smycols = mycols; 3518 svals = vals; 3519 for (i=0; i<m; i++) { 3520 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3521 smycols += ourlens[i]; 3522 svals += ourlens[i]; 3523 jj++; 3524 } 3525 } 3526 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3527 ierr = PetscFree(vals);CHKERRQ(ierr); 3528 ierr = PetscFree(mycols);CHKERRQ(ierr); 3529 ierr = PetscFree(rowners);CHKERRQ(ierr); 3530 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3531 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3532 3533 PetscFunctionReturn(0); 3534 } 3535 3536 #undef __FUNCT__ 3537 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3538 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3539 { 3540 PetscErrorCode ierr; 3541 IS iscol_local; 3542 PetscInt csize; 3543 3544 PetscFunctionBegin; 3545 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3546 if (call == MAT_REUSE_MATRIX) { 3547 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3548 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3549 } else { 3550 PetscInt cbs; 3551 ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr); 3552 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3553 ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr); 3554 } 3555 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3556 if (call == MAT_INITIAL_MATRIX) { 3557 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3558 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3559 } 3560 PetscFunctionReturn(0); 3561 } 3562 3563 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3564 #undef __FUNCT__ 3565 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3566 /* 3567 Not great since it makes two copies of the submatrix, first an SeqAIJ 3568 in local and then by concatenating the local matrices the end result. 3569 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3570 3571 Note: This requires a sequential iscol with all indices. 3572 */ 3573 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3574 { 3575 PetscErrorCode ierr; 3576 PetscMPIInt rank,size; 3577 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3578 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3579 PetscBool allcolumns, colflag; 3580 Mat M,Mreuse; 3581 MatScalar *vwork,*aa; 3582 MPI_Comm comm = ((PetscObject)mat)->comm; 3583 Mat_SeqAIJ *aij; 3584 3585 3586 PetscFunctionBegin; 3587 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3588 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3589 3590 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3591 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3592 if (colflag && ncol == mat->cmap->N){ 3593 allcolumns = PETSC_TRUE; 3594 } else { 3595 allcolumns = PETSC_FALSE; 3596 } 3597 if (call == MAT_REUSE_MATRIX) { 3598 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3599 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3600 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3601 } else { 3602 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3603 } 3604 3605 /* 3606 m - number of local rows 3607 n - number of columns (same on all processors) 3608 rstart - first row in new global matrix generated 3609 */ 3610 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3611 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3612 if (call == MAT_INITIAL_MATRIX) { 3613 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3614 ii = aij->i; 3615 jj = aij->j; 3616 3617 /* 3618 Determine the number of non-zeros in the diagonal and off-diagonal 3619 portions of the matrix in order to do correct preallocation 3620 */ 3621 3622 /* first get start and end of "diagonal" columns */ 3623 if (csize == PETSC_DECIDE) { 3624 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3625 if (mglobal == n) { /* square matrix */ 3626 nlocal = m; 3627 } else { 3628 nlocal = n/size + ((n % size) > rank); 3629 } 3630 } else { 3631 nlocal = csize; 3632 } 3633 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3634 rstart = rend - nlocal; 3635 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3636 3637 /* next, compute all the lengths */ 3638 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3639 olens = dlens + m; 3640 for (i=0; i<m; i++) { 3641 jend = ii[i+1] - ii[i]; 3642 olen = 0; 3643 dlen = 0; 3644 for (j=0; j<jend; j++) { 3645 if (*jj < rstart || *jj >= rend) olen++; 3646 else dlen++; 3647 jj++; 3648 } 3649 olens[i] = olen; 3650 dlens[i] = dlen; 3651 } 3652 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3653 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3654 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3655 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3656 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3657 ierr = PetscFree(dlens);CHKERRQ(ierr); 3658 } else { 3659 PetscInt ml,nl; 3660 3661 M = *newmat; 3662 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3663 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3664 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3665 /* 3666 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3667 rather than the slower MatSetValues(). 3668 */ 3669 M->was_assembled = PETSC_TRUE; 3670 M->assembled = PETSC_FALSE; 3671 } 3672 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3673 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3674 ii = aij->i; 3675 jj = aij->j; 3676 aa = aij->a; 3677 for (i=0; i<m; i++) { 3678 row = rstart + i; 3679 nz = ii[i+1] - ii[i]; 3680 cwork = jj; jj += nz; 3681 vwork = aa; aa += nz; 3682 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3683 } 3684 3685 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3686 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3687 *newmat = M; 3688 3689 /* save submatrix used in processor for next request */ 3690 if (call == MAT_INITIAL_MATRIX) { 3691 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3692 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3693 } 3694 3695 PetscFunctionReturn(0); 3696 } 3697 3698 EXTERN_C_BEGIN 3699 #undef __FUNCT__ 3700 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3701 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3702 { 3703 PetscInt m,cstart, cend,j,nnz,i,d; 3704 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3705 const PetscInt *JJ; 3706 PetscScalar *values; 3707 PetscErrorCode ierr; 3708 3709 PetscFunctionBegin; 3710 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3711 3712 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3713 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3714 m = B->rmap->n; 3715 cstart = B->cmap->rstart; 3716 cend = B->cmap->rend; 3717 rstart = B->rmap->rstart; 3718 3719 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3720 3721 #if defined(PETSC_USE_DEBUGGING) 3722 for (i=0; i<m; i++) { 3723 nnz = Ii[i+1]- Ii[i]; 3724 JJ = J + Ii[i]; 3725 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3726 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3727 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3728 } 3729 #endif 3730 3731 for (i=0; i<m; i++) { 3732 nnz = Ii[i+1]- Ii[i]; 3733 JJ = J + Ii[i]; 3734 nnz_max = PetscMax(nnz_max,nnz); 3735 d = 0; 3736 for (j=0; j<nnz; j++) { 3737 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3738 } 3739 d_nnz[i] = d; 3740 o_nnz[i] = nnz - d; 3741 } 3742 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3743 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3744 3745 if (v) values = (PetscScalar*)v; 3746 else { 3747 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3748 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3749 } 3750 3751 for (i=0; i<m; i++) { 3752 ii = i + rstart; 3753 nnz = Ii[i+1]- Ii[i]; 3754 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3755 } 3756 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3757 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3758 3759 if (!v) { 3760 ierr = PetscFree(values);CHKERRQ(ierr); 3761 } 3762 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3763 PetscFunctionReturn(0); 3764 } 3765 EXTERN_C_END 3766 3767 #undef __FUNCT__ 3768 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3769 /*@ 3770 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3771 (the default parallel PETSc format). 3772 3773 Collective on MPI_Comm 3774 3775 Input Parameters: 3776 + B - the matrix 3777 . i - the indices into j for the start of each local row (starts with zero) 3778 . j - the column indices for each local row (starts with zero) 3779 - v - optional values in the matrix 3780 3781 Level: developer 3782 3783 Notes: 3784 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3785 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3786 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3787 3788 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3789 3790 The format which is used for the sparse matrix input, is equivalent to a 3791 row-major ordering.. i.e for the following matrix, the input data expected is 3792 as shown: 3793 3794 1 0 0 3795 2 0 3 P0 3796 ------- 3797 4 5 6 P1 3798 3799 Process0 [P0]: rows_owned=[0,1] 3800 i = {0,1,3} [size = nrow+1 = 2+1] 3801 j = {0,0,2} [size = nz = 6] 3802 v = {1,2,3} [size = nz = 6] 3803 3804 Process1 [P1]: rows_owned=[2] 3805 i = {0,3} [size = nrow+1 = 1+1] 3806 j = {0,1,2} [size = nz = 6] 3807 v = {4,5,6} [size = nz = 6] 3808 3809 .keywords: matrix, aij, compressed row, sparse, parallel 3810 3811 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3812 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3813 @*/ 3814 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3815 { 3816 PetscErrorCode ierr; 3817 3818 PetscFunctionBegin; 3819 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3820 PetscFunctionReturn(0); 3821 } 3822 3823 #undef __FUNCT__ 3824 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3825 /*@C 3826 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3827 (the default parallel PETSc format). For good matrix assembly performance 3828 the user should preallocate the matrix storage by setting the parameters 3829 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3830 performance can be increased by more than a factor of 50. 3831 3832 Collective on MPI_Comm 3833 3834 Input Parameters: 3835 + A - the matrix 3836 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3837 (same value is used for all local rows) 3838 . d_nnz - array containing the number of nonzeros in the various rows of the 3839 DIAGONAL portion of the local submatrix (possibly different for each row) 3840 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3841 The size of this array is equal to the number of local rows, i.e 'm'. 3842 For matrices that will be factored, you must leave room for (and set) 3843 the diagonal entry even if it is zero. 3844 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3845 submatrix (same value is used for all local rows). 3846 - o_nnz - array containing the number of nonzeros in the various rows of the 3847 OFF-DIAGONAL portion of the local submatrix (possibly different for 3848 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3849 structure. The size of this array is equal to the number 3850 of local rows, i.e 'm'. 3851 3852 If the *_nnz parameter is given then the *_nz parameter is ignored 3853 3854 The AIJ format (also called the Yale sparse matrix format or 3855 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3856 storage. The stored row and column indices begin with zero. 3857 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3858 3859 The parallel matrix is partitioned such that the first m0 rows belong to 3860 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3861 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3862 3863 The DIAGONAL portion of the local submatrix of a processor can be defined 3864 as the submatrix which is obtained by extraction the part corresponding to 3865 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3866 first row that belongs to the processor, r2 is the last row belonging to 3867 the this processor, and c1-c2 is range of indices of the local part of a 3868 vector suitable for applying the matrix to. This is an mxn matrix. In the 3869 common case of a square matrix, the row and column ranges are the same and 3870 the DIAGONAL part is also square. The remaining portion of the local 3871 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3872 3873 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3874 3875 You can call MatGetInfo() to get information on how effective the preallocation was; 3876 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3877 You can also run with the option -info and look for messages with the string 3878 malloc in them to see if additional memory allocation was needed. 3879 3880 Example usage: 3881 3882 Consider the following 8x8 matrix with 34 non-zero values, that is 3883 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3884 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3885 as follows: 3886 3887 .vb 3888 1 2 0 | 0 3 0 | 0 4 3889 Proc0 0 5 6 | 7 0 0 | 8 0 3890 9 0 10 | 11 0 0 | 12 0 3891 ------------------------------------- 3892 13 0 14 | 15 16 17 | 0 0 3893 Proc1 0 18 0 | 19 20 21 | 0 0 3894 0 0 0 | 22 23 0 | 24 0 3895 ------------------------------------- 3896 Proc2 25 26 27 | 0 0 28 | 29 0 3897 30 0 0 | 31 32 33 | 0 34 3898 .ve 3899 3900 This can be represented as a collection of submatrices as: 3901 3902 .vb 3903 A B C 3904 D E F 3905 G H I 3906 .ve 3907 3908 Where the submatrices A,B,C are owned by proc0, D,E,F are 3909 owned by proc1, G,H,I are owned by proc2. 3910 3911 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3912 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3913 The 'M','N' parameters are 8,8, and have the same values on all procs. 3914 3915 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3916 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3917 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3918 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3919 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3920 matrix, ans [DF] as another SeqAIJ matrix. 3921 3922 When d_nz, o_nz parameters are specified, d_nz storage elements are 3923 allocated for every row of the local diagonal submatrix, and o_nz 3924 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3925 One way to choose d_nz and o_nz is to use the max nonzerors per local 3926 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3927 In this case, the values of d_nz,o_nz are: 3928 .vb 3929 proc0 : dnz = 2, o_nz = 2 3930 proc1 : dnz = 3, o_nz = 2 3931 proc2 : dnz = 1, o_nz = 4 3932 .ve 3933 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3934 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3935 for proc3. i.e we are using 12+15+10=37 storage locations to store 3936 34 values. 3937 3938 When d_nnz, o_nnz parameters are specified, the storage is specified 3939 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3940 In the above case the values for d_nnz,o_nnz are: 3941 .vb 3942 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3943 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3944 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3945 .ve 3946 Here the space allocated is sum of all the above values i.e 34, and 3947 hence pre-allocation is perfect. 3948 3949 Level: intermediate 3950 3951 .keywords: matrix, aij, compressed row, sparse, parallel 3952 3953 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3954 MPIAIJ, MatGetInfo() 3955 @*/ 3956 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3957 { 3958 PetscErrorCode ierr; 3959 3960 PetscFunctionBegin; 3961 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3962 PetscValidType(B,1); 3963 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3964 PetscFunctionReturn(0); 3965 } 3966 3967 #undef __FUNCT__ 3968 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3969 /*@ 3970 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3971 CSR format the local rows. 3972 3973 Collective on MPI_Comm 3974 3975 Input Parameters: 3976 + comm - MPI communicator 3977 . m - number of local rows (Cannot be PETSC_DECIDE) 3978 . n - This value should be the same as the local size used in creating the 3979 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3980 calculated if N is given) For square matrices n is almost always m. 3981 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3982 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3983 . i - row indices 3984 . j - column indices 3985 - a - matrix values 3986 3987 Output Parameter: 3988 . mat - the matrix 3989 3990 Level: intermediate 3991 3992 Notes: 3993 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3994 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3995 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3996 3997 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3998 3999 The format which is used for the sparse matrix input, is equivalent to a 4000 row-major ordering.. i.e for the following matrix, the input data expected is 4001 as shown: 4002 4003 1 0 0 4004 2 0 3 P0 4005 ------- 4006 4 5 6 P1 4007 4008 Process0 [P0]: rows_owned=[0,1] 4009 i = {0,1,3} [size = nrow+1 = 2+1] 4010 j = {0,0,2} [size = nz = 6] 4011 v = {1,2,3} [size = nz = 6] 4012 4013 Process1 [P1]: rows_owned=[2] 4014 i = {0,3} [size = nrow+1 = 1+1] 4015 j = {0,1,2} [size = nz = 6] 4016 v = {4,5,6} [size = nz = 6] 4017 4018 .keywords: matrix, aij, compressed row, sparse, parallel 4019 4020 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4021 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4022 @*/ 4023 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4024 { 4025 PetscErrorCode ierr; 4026 4027 PetscFunctionBegin; 4028 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4029 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4030 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4031 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4032 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 4033 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4034 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4035 PetscFunctionReturn(0); 4036 } 4037 4038 #undef __FUNCT__ 4039 #define __FUNCT__ "MatCreateAIJ" 4040 /*@C 4041 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4042 (the default parallel PETSc format). For good matrix assembly performance 4043 the user should preallocate the matrix storage by setting the parameters 4044 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4045 performance can be increased by more than a factor of 50. 4046 4047 Collective on MPI_Comm 4048 4049 Input Parameters: 4050 + comm - MPI communicator 4051 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4052 This value should be the same as the local size used in creating the 4053 y vector for the matrix-vector product y = Ax. 4054 . n - This value should be the same as the local size used in creating the 4055 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4056 calculated if N is given) For square matrices n is almost always m. 4057 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4058 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4059 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4060 (same value is used for all local rows) 4061 . d_nnz - array containing the number of nonzeros in the various rows of the 4062 DIAGONAL portion of the local submatrix (possibly different for each row) 4063 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4064 The size of this array is equal to the number of local rows, i.e 'm'. 4065 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4066 submatrix (same value is used for all local rows). 4067 - o_nnz - array containing the number of nonzeros in the various rows of the 4068 OFF-DIAGONAL portion of the local submatrix (possibly different for 4069 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4070 structure. The size of this array is equal to the number 4071 of local rows, i.e 'm'. 4072 4073 Output Parameter: 4074 . A - the matrix 4075 4076 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4077 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4078 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4079 4080 Notes: 4081 If the *_nnz parameter is given then the *_nz parameter is ignored 4082 4083 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4084 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4085 storage requirements for this matrix. 4086 4087 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4088 processor than it must be used on all processors that share the object for 4089 that argument. 4090 4091 The user MUST specify either the local or global matrix dimensions 4092 (possibly both). 4093 4094 The parallel matrix is partitioned across processors such that the 4095 first m0 rows belong to process 0, the next m1 rows belong to 4096 process 1, the next m2 rows belong to process 2 etc.. where 4097 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4098 values corresponding to [m x N] submatrix. 4099 4100 The columns are logically partitioned with the n0 columns belonging 4101 to 0th partition, the next n1 columns belonging to the next 4102 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4103 4104 The DIAGONAL portion of the local submatrix on any given processor 4105 is the submatrix corresponding to the rows and columns m,n 4106 corresponding to the given processor. i.e diagonal matrix on 4107 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4108 etc. The remaining portion of the local submatrix [m x (N-n)] 4109 constitute the OFF-DIAGONAL portion. The example below better 4110 illustrates this concept. 4111 4112 For a square global matrix we define each processor's diagonal portion 4113 to be its local rows and the corresponding columns (a square submatrix); 4114 each processor's off-diagonal portion encompasses the remainder of the 4115 local matrix (a rectangular submatrix). 4116 4117 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4118 4119 When calling this routine with a single process communicator, a matrix of 4120 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4121 type of communicator, use the construction mechanism: 4122 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4123 4124 By default, this format uses inodes (identical nodes) when possible. 4125 We search for consecutive rows with the same nonzero structure, thereby 4126 reusing matrix information to achieve increased efficiency. 4127 4128 Options Database Keys: 4129 + -mat_no_inode - Do not use inodes 4130 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4131 - -mat_aij_oneindex - Internally use indexing starting at 1 4132 rather than 0. Note that when calling MatSetValues(), 4133 the user still MUST index entries starting at 0! 4134 4135 4136 Example usage: 4137 4138 Consider the following 8x8 matrix with 34 non-zero values, that is 4139 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4140 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4141 as follows: 4142 4143 .vb 4144 1 2 0 | 0 3 0 | 0 4 4145 Proc0 0 5 6 | 7 0 0 | 8 0 4146 9 0 10 | 11 0 0 | 12 0 4147 ------------------------------------- 4148 13 0 14 | 15 16 17 | 0 0 4149 Proc1 0 18 0 | 19 20 21 | 0 0 4150 0 0 0 | 22 23 0 | 24 0 4151 ------------------------------------- 4152 Proc2 25 26 27 | 0 0 28 | 29 0 4153 30 0 0 | 31 32 33 | 0 34 4154 .ve 4155 4156 This can be represented as a collection of submatrices as: 4157 4158 .vb 4159 A B C 4160 D E F 4161 G H I 4162 .ve 4163 4164 Where the submatrices A,B,C are owned by proc0, D,E,F are 4165 owned by proc1, G,H,I are owned by proc2. 4166 4167 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4168 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4169 The 'M','N' parameters are 8,8, and have the same values on all procs. 4170 4171 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4172 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4173 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4174 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4175 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4176 matrix, ans [DF] as another SeqAIJ matrix. 4177 4178 When d_nz, o_nz parameters are specified, d_nz storage elements are 4179 allocated for every row of the local diagonal submatrix, and o_nz 4180 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4181 One way to choose d_nz and o_nz is to use the max nonzerors per local 4182 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4183 In this case, the values of d_nz,o_nz are: 4184 .vb 4185 proc0 : dnz = 2, o_nz = 2 4186 proc1 : dnz = 3, o_nz = 2 4187 proc2 : dnz = 1, o_nz = 4 4188 .ve 4189 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4190 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4191 for proc3. i.e we are using 12+15+10=37 storage locations to store 4192 34 values. 4193 4194 When d_nnz, o_nnz parameters are specified, the storage is specified 4195 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4196 In the above case the values for d_nnz,o_nnz are: 4197 .vb 4198 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4199 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4200 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4201 .ve 4202 Here the space allocated is sum of all the above values i.e 34, and 4203 hence pre-allocation is perfect. 4204 4205 Level: intermediate 4206 4207 .keywords: matrix, aij, compressed row, sparse, parallel 4208 4209 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4210 MPIAIJ, MatCreateMPIAIJWithArrays() 4211 @*/ 4212 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4213 { 4214 PetscErrorCode ierr; 4215 PetscMPIInt size; 4216 4217 PetscFunctionBegin; 4218 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4219 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4220 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4221 if (size > 1) { 4222 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4223 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4224 } else { 4225 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4226 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4227 } 4228 PetscFunctionReturn(0); 4229 } 4230 4231 #undef __FUNCT__ 4232 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4233 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4234 { 4235 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4236 4237 PetscFunctionBegin; 4238 *Ad = a->A; 4239 *Ao = a->B; 4240 *colmap = a->garray; 4241 PetscFunctionReturn(0); 4242 } 4243 4244 #undef __FUNCT__ 4245 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4246 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4247 { 4248 PetscErrorCode ierr; 4249 PetscInt i; 4250 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4251 4252 PetscFunctionBegin; 4253 if (coloring->ctype == IS_COLORING_GLOBAL) { 4254 ISColoringValue *allcolors,*colors; 4255 ISColoring ocoloring; 4256 4257 /* set coloring for diagonal portion */ 4258 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4259 4260 /* set coloring for off-diagonal portion */ 4261 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4262 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4263 for (i=0; i<a->B->cmap->n; i++) { 4264 colors[i] = allcolors[a->garray[i]]; 4265 } 4266 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4267 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4268 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4269 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4270 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4271 ISColoringValue *colors; 4272 PetscInt *larray; 4273 ISColoring ocoloring; 4274 4275 /* set coloring for diagonal portion */ 4276 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4277 for (i=0; i<a->A->cmap->n; i++) { 4278 larray[i] = i + A->cmap->rstart; 4279 } 4280 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4281 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4282 for (i=0; i<a->A->cmap->n; i++) { 4283 colors[i] = coloring->colors[larray[i]]; 4284 } 4285 ierr = PetscFree(larray);CHKERRQ(ierr); 4286 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4287 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4288 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4289 4290 /* set coloring for off-diagonal portion */ 4291 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4292 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4293 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4294 for (i=0; i<a->B->cmap->n; i++) { 4295 colors[i] = coloring->colors[larray[i]]; 4296 } 4297 ierr = PetscFree(larray);CHKERRQ(ierr); 4298 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4299 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4300 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4301 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4302 4303 PetscFunctionReturn(0); 4304 } 4305 4306 #if defined(PETSC_HAVE_ADIC) 4307 #undef __FUNCT__ 4308 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4309 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4310 { 4311 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4312 PetscErrorCode ierr; 4313 4314 PetscFunctionBegin; 4315 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4316 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4317 PetscFunctionReturn(0); 4318 } 4319 #endif 4320 4321 #undef __FUNCT__ 4322 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4323 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4324 { 4325 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4326 PetscErrorCode ierr; 4327 4328 PetscFunctionBegin; 4329 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4330 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4331 PetscFunctionReturn(0); 4332 } 4333 4334 #undef __FUNCT__ 4335 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4336 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4337 { 4338 PetscErrorCode ierr; 4339 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4340 PetscInt *indx; 4341 4342 PetscFunctionBegin; 4343 /* This routine will ONLY return MPIAIJ type matrix */ 4344 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4345 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4346 if (n == PETSC_DECIDE){ 4347 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4348 } 4349 /* Check sum(n) = N */ 4350 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4351 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4352 4353 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4354 rstart -= m; 4355 4356 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4357 for (i=0;i<m;i++) { 4358 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4359 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4360 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4361 } 4362 4363 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4364 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4365 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4366 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4367 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4368 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4369 PetscFunctionReturn(0); 4370 } 4371 4372 #undef __FUNCT__ 4373 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4374 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4375 { 4376 PetscErrorCode ierr; 4377 PetscInt m,N,i,rstart,nnz,Ii; 4378 PetscInt *indx; 4379 PetscScalar *values; 4380 4381 PetscFunctionBegin; 4382 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4383 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4384 for (i=0;i<m;i++) { 4385 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4386 Ii = i + rstart; 4387 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4388 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4389 } 4390 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4391 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4392 PetscFunctionReturn(0); 4393 } 4394 4395 #undef __FUNCT__ 4396 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4397 /*@ 4398 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4399 matrices from each processor 4400 4401 Collective on MPI_Comm 4402 4403 Input Parameters: 4404 + comm - the communicators the parallel matrix will live on 4405 . inmat - the input sequential matrices 4406 . n - number of local columns (or PETSC_DECIDE) 4407 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4408 4409 Output Parameter: 4410 . outmat - the parallel matrix generated 4411 4412 Level: advanced 4413 4414 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4415 4416 @*/ 4417 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4418 { 4419 PetscErrorCode ierr; 4420 4421 PetscFunctionBegin; 4422 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4423 if (scall == MAT_INITIAL_MATRIX){ 4424 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4425 } 4426 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4427 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4428 PetscFunctionReturn(0); 4429 } 4430 4431 #undef __FUNCT__ 4432 #define __FUNCT__ "MatFileSplit" 4433 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4434 { 4435 PetscErrorCode ierr; 4436 PetscMPIInt rank; 4437 PetscInt m,N,i,rstart,nnz; 4438 size_t len; 4439 const PetscInt *indx; 4440 PetscViewer out; 4441 char *name; 4442 Mat B; 4443 const PetscScalar *values; 4444 4445 PetscFunctionBegin; 4446 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4447 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4448 /* Should this be the type of the diagonal block of A? */ 4449 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4450 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4451 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4452 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4453 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4454 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4455 for (i=0;i<m;i++) { 4456 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4457 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4458 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4459 } 4460 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4461 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4462 4463 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4464 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4465 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4466 sprintf(name,"%s.%d",outfile,rank); 4467 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4468 ierr = PetscFree(name); 4469 ierr = MatView(B,out);CHKERRQ(ierr); 4470 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4471 ierr = MatDestroy(&B);CHKERRQ(ierr); 4472 PetscFunctionReturn(0); 4473 } 4474 4475 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4476 #undef __FUNCT__ 4477 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4478 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4479 { 4480 PetscErrorCode ierr; 4481 Mat_Merge_SeqsToMPI *merge; 4482 PetscContainer container; 4483 4484 PetscFunctionBegin; 4485 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4486 if (container) { 4487 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4488 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4489 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4490 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4491 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4492 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4493 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4494 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4495 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4496 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4497 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4498 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4500 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4501 ierr = PetscFree(merge);CHKERRQ(ierr); 4502 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4503 } 4504 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4505 PetscFunctionReturn(0); 4506 } 4507 4508 #include <../src/mat/utils/freespace.h> 4509 #include <petscbt.h> 4510 4511 #undef __FUNCT__ 4512 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4513 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4514 { 4515 PetscErrorCode ierr; 4516 MPI_Comm comm=((PetscObject)mpimat)->comm; 4517 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4518 PetscMPIInt size,rank,taga,*len_s; 4519 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4520 PetscInt proc,m; 4521 PetscInt **buf_ri,**buf_rj; 4522 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4523 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4524 MPI_Request *s_waits,*r_waits; 4525 MPI_Status *status; 4526 MatScalar *aa=a->a; 4527 MatScalar **abuf_r,*ba_i; 4528 Mat_Merge_SeqsToMPI *merge; 4529 PetscContainer container; 4530 4531 PetscFunctionBegin; 4532 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4533 4534 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4535 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4536 4537 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4538 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4539 4540 bi = merge->bi; 4541 bj = merge->bj; 4542 buf_ri = merge->buf_ri; 4543 buf_rj = merge->buf_rj; 4544 4545 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4546 owners = merge->rowmap->range; 4547 len_s = merge->len_s; 4548 4549 /* send and recv matrix values */ 4550 /*-----------------------------*/ 4551 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4552 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4553 4554 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4555 for (proc=0,k=0; proc<size; proc++){ 4556 if (!len_s[proc]) continue; 4557 i = owners[proc]; 4558 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4559 k++; 4560 } 4561 4562 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4563 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4564 ierr = PetscFree(status);CHKERRQ(ierr); 4565 4566 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4567 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4568 4569 /* insert mat values of mpimat */ 4570 /*----------------------------*/ 4571 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4572 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4573 4574 for (k=0; k<merge->nrecv; k++){ 4575 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4576 nrows = *(buf_ri_k[k]); 4577 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4578 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4579 } 4580 4581 /* set values of ba */ 4582 m = merge->rowmap->n; 4583 for (i=0; i<m; i++) { 4584 arow = owners[rank] + i; 4585 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4586 bnzi = bi[i+1] - bi[i]; 4587 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4588 4589 /* add local non-zero vals of this proc's seqmat into ba */ 4590 anzi = ai[arow+1] - ai[arow]; 4591 aj = a->j + ai[arow]; 4592 aa = a->a + ai[arow]; 4593 nextaj = 0; 4594 for (j=0; nextaj<anzi; j++){ 4595 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4596 ba_i[j] += aa[nextaj++]; 4597 } 4598 } 4599 4600 /* add received vals into ba */ 4601 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4602 /* i-th row */ 4603 if (i == *nextrow[k]) { 4604 anzi = *(nextai[k]+1) - *nextai[k]; 4605 aj = buf_rj[k] + *(nextai[k]); 4606 aa = abuf_r[k] + *(nextai[k]); 4607 nextaj = 0; 4608 for (j=0; nextaj<anzi; j++){ 4609 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4610 ba_i[j] += aa[nextaj++]; 4611 } 4612 } 4613 nextrow[k]++; nextai[k]++; 4614 } 4615 } 4616 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4617 } 4618 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4619 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4620 4621 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4622 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4623 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4624 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4625 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4626 PetscFunctionReturn(0); 4627 } 4628 4629 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4630 4631 #undef __FUNCT__ 4632 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4633 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4634 { 4635 PetscErrorCode ierr; 4636 Mat B_mpi; 4637 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4638 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4639 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4640 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4641 PetscInt len,proc,*dnz,*onz,bs,cbs; 4642 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4643 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4644 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4645 MPI_Status *status; 4646 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4647 PetscBT lnkbt; 4648 Mat_Merge_SeqsToMPI *merge; 4649 PetscContainer container; 4650 4651 PetscFunctionBegin; 4652 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4653 4654 /* make sure it is a PETSc comm */ 4655 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4656 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4657 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4658 4659 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4660 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4661 4662 /* determine row ownership */ 4663 /*---------------------------------------------------------*/ 4664 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4665 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4666 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4667 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4668 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4669 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4670 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4671 4672 m = merge->rowmap->n; 4673 M = merge->rowmap->N; 4674 owners = merge->rowmap->range; 4675 4676 /* determine the number of messages to send, their lengths */ 4677 /*---------------------------------------------------------*/ 4678 len_s = merge->len_s; 4679 4680 len = 0; /* length of buf_si[] */ 4681 merge->nsend = 0; 4682 for (proc=0; proc<size; proc++){ 4683 len_si[proc] = 0; 4684 if (proc == rank){ 4685 len_s[proc] = 0; 4686 } else { 4687 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4688 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4689 } 4690 if (len_s[proc]) { 4691 merge->nsend++; 4692 nrows = 0; 4693 for (i=owners[proc]; i<owners[proc+1]; i++){ 4694 if (ai[i+1] > ai[i]) nrows++; 4695 } 4696 len_si[proc] = 2*(nrows+1); 4697 len += len_si[proc]; 4698 } 4699 } 4700 4701 /* determine the number and length of messages to receive for ij-structure */ 4702 /*-------------------------------------------------------------------------*/ 4703 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4704 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4705 4706 /* post the Irecv of j-structure */ 4707 /*-------------------------------*/ 4708 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4709 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4710 4711 /* post the Isend of j-structure */ 4712 /*--------------------------------*/ 4713 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4714 4715 for (proc=0, k=0; proc<size; proc++){ 4716 if (!len_s[proc]) continue; 4717 i = owners[proc]; 4718 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4719 k++; 4720 } 4721 4722 /* receives and sends of j-structure are complete */ 4723 /*------------------------------------------------*/ 4724 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4725 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4726 4727 /* send and recv i-structure */ 4728 /*---------------------------*/ 4729 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4730 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4731 4732 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4733 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4734 for (proc=0,k=0; proc<size; proc++){ 4735 if (!len_s[proc]) continue; 4736 /* form outgoing message for i-structure: 4737 buf_si[0]: nrows to be sent 4738 [1:nrows]: row index (global) 4739 [nrows+1:2*nrows+1]: i-structure index 4740 */ 4741 /*-------------------------------------------*/ 4742 nrows = len_si[proc]/2 - 1; 4743 buf_si_i = buf_si + nrows+1; 4744 buf_si[0] = nrows; 4745 buf_si_i[0] = 0; 4746 nrows = 0; 4747 for (i=owners[proc]; i<owners[proc+1]; i++){ 4748 anzi = ai[i+1] - ai[i]; 4749 if (anzi) { 4750 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4751 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4752 nrows++; 4753 } 4754 } 4755 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4756 k++; 4757 buf_si += len_si[proc]; 4758 } 4759 4760 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4761 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4762 4763 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4764 for (i=0; i<merge->nrecv; i++){ 4765 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4766 } 4767 4768 ierr = PetscFree(len_si);CHKERRQ(ierr); 4769 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4770 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4771 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4772 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4773 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4774 ierr = PetscFree(status);CHKERRQ(ierr); 4775 4776 /* compute a local seq matrix in each processor */ 4777 /*----------------------------------------------*/ 4778 /* allocate bi array and free space for accumulating nonzero column info */ 4779 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4780 bi[0] = 0; 4781 4782 /* create and initialize a linked list */ 4783 nlnk = N+1; 4784 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4785 4786 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4787 len = 0; 4788 len = ai[owners[rank+1]] - ai[owners[rank]]; 4789 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4790 current_space = free_space; 4791 4792 /* determine symbolic info for each local row */ 4793 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4794 4795 for (k=0; k<merge->nrecv; k++){ 4796 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4797 nrows = *buf_ri_k[k]; 4798 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4799 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4800 } 4801 4802 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4803 len = 0; 4804 for (i=0;i<m;i++) { 4805 bnzi = 0; 4806 /* add local non-zero cols of this proc's seqmat into lnk */ 4807 arow = owners[rank] + i; 4808 anzi = ai[arow+1] - ai[arow]; 4809 aj = a->j + ai[arow]; 4810 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4811 bnzi += nlnk; 4812 /* add received col data into lnk */ 4813 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4814 if (i == *nextrow[k]) { /* i-th row */ 4815 anzi = *(nextai[k]+1) - *nextai[k]; 4816 aj = buf_rj[k] + *nextai[k]; 4817 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4818 bnzi += nlnk; 4819 nextrow[k]++; nextai[k]++; 4820 } 4821 } 4822 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4823 4824 /* if free space is not available, make more free space */ 4825 if (current_space->local_remaining<bnzi) { 4826 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4827 nspacedouble++; 4828 } 4829 /* copy data into free space, then initialize lnk */ 4830 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4831 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4832 4833 current_space->array += bnzi; 4834 current_space->local_used += bnzi; 4835 current_space->local_remaining -= bnzi; 4836 4837 bi[i+1] = bi[i] + bnzi; 4838 } 4839 4840 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4841 4842 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4843 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4844 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4845 4846 /* create symbolic parallel matrix B_mpi */ 4847 /*---------------------------------------*/ 4848 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4849 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4850 if (n==PETSC_DECIDE) { 4851 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4852 } else { 4853 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4854 } 4855 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4856 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4857 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4858 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4859 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4860 4861 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4862 B_mpi->assembled = PETSC_FALSE; 4863 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4864 merge->bi = bi; 4865 merge->bj = bj; 4866 merge->buf_ri = buf_ri; 4867 merge->buf_rj = buf_rj; 4868 merge->coi = PETSC_NULL; 4869 merge->coj = PETSC_NULL; 4870 merge->owners_co = PETSC_NULL; 4871 4872 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4873 4874 /* attach the supporting struct to B_mpi for reuse */ 4875 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4876 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4877 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4878 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4879 *mpimat = B_mpi; 4880 4881 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4882 PetscFunctionReturn(0); 4883 } 4884 4885 #undef __FUNCT__ 4886 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4887 /*@C 4888 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4889 matrices from each processor 4890 4891 Collective on MPI_Comm 4892 4893 Input Parameters: 4894 + comm - the communicators the parallel matrix will live on 4895 . seqmat - the input sequential matrices 4896 . m - number of local rows (or PETSC_DECIDE) 4897 . n - number of local columns (or PETSC_DECIDE) 4898 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4899 4900 Output Parameter: 4901 . mpimat - the parallel matrix generated 4902 4903 Level: advanced 4904 4905 Notes: 4906 The dimensions of the sequential matrix in each processor MUST be the same. 4907 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4908 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4909 @*/ 4910 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4911 { 4912 PetscErrorCode ierr; 4913 PetscMPIInt size; 4914 4915 PetscFunctionBegin; 4916 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4917 if (size == 1){ 4918 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4919 if (scall == MAT_INITIAL_MATRIX){ 4920 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4921 } else { 4922 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4923 } 4924 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4925 PetscFunctionReturn(0); 4926 } 4927 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4928 if (scall == MAT_INITIAL_MATRIX){ 4929 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4930 } 4931 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4932 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4933 PetscFunctionReturn(0); 4934 } 4935 4936 #undef __FUNCT__ 4937 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4938 /*@ 4939 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4940 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4941 with MatGetSize() 4942 4943 Not Collective 4944 4945 Input Parameters: 4946 + A - the matrix 4947 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4948 4949 Output Parameter: 4950 . A_loc - the local sequential matrix generated 4951 4952 Level: developer 4953 4954 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4955 4956 @*/ 4957 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4958 { 4959 PetscErrorCode ierr; 4960 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4961 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4962 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4963 MatScalar *aa=a->a,*ba=b->a,*cam; 4964 PetscScalar *ca; 4965 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4966 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4967 PetscBool match; 4968 4969 PetscFunctionBegin; 4970 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4971 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4972 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4973 if (scall == MAT_INITIAL_MATRIX){ 4974 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4975 ci[0] = 0; 4976 for (i=0; i<am; i++){ 4977 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4978 } 4979 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4980 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4981 k = 0; 4982 for (i=0; i<am; i++) { 4983 ncols_o = bi[i+1] - bi[i]; 4984 ncols_d = ai[i+1] - ai[i]; 4985 /* off-diagonal portion of A */ 4986 for (jo=0; jo<ncols_o; jo++) { 4987 col = cmap[*bj]; 4988 if (col >= cstart) break; 4989 cj[k] = col; bj++; 4990 ca[k++] = *ba++; 4991 } 4992 /* diagonal portion of A */ 4993 for (j=0; j<ncols_d; j++) { 4994 cj[k] = cstart + *aj++; 4995 ca[k++] = *aa++; 4996 } 4997 /* off-diagonal portion of A */ 4998 for (j=jo; j<ncols_o; j++) { 4999 cj[k] = cmap[*bj++]; 5000 ca[k++] = *ba++; 5001 } 5002 } 5003 /* put together the new matrix */ 5004 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5005 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5006 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5007 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5008 mat->free_a = PETSC_TRUE; 5009 mat->free_ij = PETSC_TRUE; 5010 mat->nonew = 0; 5011 } else if (scall == MAT_REUSE_MATRIX){ 5012 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5013 ci = mat->i; cj = mat->j; cam = mat->a; 5014 for (i=0; i<am; i++) { 5015 /* off-diagonal portion of A */ 5016 ncols_o = bi[i+1] - bi[i]; 5017 for (jo=0; jo<ncols_o; jo++) { 5018 col = cmap[*bj]; 5019 if (col >= cstart) break; 5020 *cam++ = *ba++; bj++; 5021 } 5022 /* diagonal portion of A */ 5023 ncols_d = ai[i+1] - ai[i]; 5024 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5025 /* off-diagonal portion of A */ 5026 for (j=jo; j<ncols_o; j++) { 5027 *cam++ = *ba++; bj++; 5028 } 5029 } 5030 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5031 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5032 PetscFunctionReturn(0); 5033 } 5034 5035 #undef __FUNCT__ 5036 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5037 /*@C 5038 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5039 5040 Not Collective 5041 5042 Input Parameters: 5043 + A - the matrix 5044 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5045 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5046 5047 Output Parameter: 5048 . A_loc - the local sequential matrix generated 5049 5050 Level: developer 5051 5052 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5053 5054 @*/ 5055 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5056 { 5057 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5058 PetscErrorCode ierr; 5059 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5060 IS isrowa,iscola; 5061 Mat *aloc; 5062 PetscBool match; 5063 5064 PetscFunctionBegin; 5065 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5066 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5067 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5068 if (!row){ 5069 start = A->rmap->rstart; end = A->rmap->rend; 5070 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5071 } else { 5072 isrowa = *row; 5073 } 5074 if (!col){ 5075 start = A->cmap->rstart; 5076 cmap = a->garray; 5077 nzA = a->A->cmap->n; 5078 nzB = a->B->cmap->n; 5079 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5080 ncols = 0; 5081 for (i=0; i<nzB; i++) { 5082 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5083 else break; 5084 } 5085 imark = i; 5086 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5087 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5088 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5089 } else { 5090 iscola = *col; 5091 } 5092 if (scall != MAT_INITIAL_MATRIX){ 5093 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5094 aloc[0] = *A_loc; 5095 } 5096 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5097 *A_loc = aloc[0]; 5098 ierr = PetscFree(aloc);CHKERRQ(ierr); 5099 if (!row){ 5100 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5101 } 5102 if (!col){ 5103 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5104 } 5105 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5106 PetscFunctionReturn(0); 5107 } 5108 5109 #undef __FUNCT__ 5110 #define __FUNCT__ "MatGetBrowsOfAcols" 5111 /*@C 5112 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5113 5114 Collective on Mat 5115 5116 Input Parameters: 5117 + A,B - the matrices in mpiaij format 5118 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5119 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5120 5121 Output Parameter: 5122 + rowb, colb - index sets of rows and columns of B to extract 5123 - B_seq - the sequential matrix generated 5124 5125 Level: developer 5126 5127 @*/ 5128 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5129 { 5130 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5131 PetscErrorCode ierr; 5132 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5133 IS isrowb,iscolb; 5134 Mat *bseq=PETSC_NULL; 5135 5136 PetscFunctionBegin; 5137 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5138 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5139 } 5140 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5141 5142 if (scall == MAT_INITIAL_MATRIX){ 5143 start = A->cmap->rstart; 5144 cmap = a->garray; 5145 nzA = a->A->cmap->n; 5146 nzB = a->B->cmap->n; 5147 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5148 ncols = 0; 5149 for (i=0; i<nzB; i++) { /* row < local row index */ 5150 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5151 else break; 5152 } 5153 imark = i; 5154 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5155 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5156 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5157 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5158 } else { 5159 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5160 isrowb = *rowb; iscolb = *colb; 5161 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5162 bseq[0] = *B_seq; 5163 } 5164 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5165 *B_seq = bseq[0]; 5166 ierr = PetscFree(bseq);CHKERRQ(ierr); 5167 if (!rowb){ 5168 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5169 } else { 5170 *rowb = isrowb; 5171 } 5172 if (!colb){ 5173 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5174 } else { 5175 *colb = iscolb; 5176 } 5177 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5178 PetscFunctionReturn(0); 5179 } 5180 5181 #undef __FUNCT__ 5182 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5183 /* 5184 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5185 of the OFF-DIAGONAL portion of local A 5186 5187 Collective on Mat 5188 5189 Input Parameters: 5190 + A,B - the matrices in mpiaij format 5191 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5192 5193 Output Parameter: 5194 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5195 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5196 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5197 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5198 5199 Level: developer 5200 5201 */ 5202 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5203 { 5204 VecScatter_MPI_General *gen_to,*gen_from; 5205 PetscErrorCode ierr; 5206 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5207 Mat_SeqAIJ *b_oth; 5208 VecScatter ctx=a->Mvctx; 5209 MPI_Comm comm=((PetscObject)ctx)->comm; 5210 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5211 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5212 PetscScalar *rvalues,*svalues; 5213 MatScalar *b_otha,*bufa,*bufA; 5214 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5215 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5216 MPI_Status *sstatus,rstatus; 5217 PetscMPIInt jj; 5218 PetscInt *cols,sbs,rbs; 5219 PetscScalar *vals; 5220 5221 PetscFunctionBegin; 5222 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5223 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5224 } 5225 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5226 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5227 5228 gen_to = (VecScatter_MPI_General*)ctx->todata; 5229 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5230 rvalues = gen_from->values; /* holds the length of receiving row */ 5231 svalues = gen_to->values; /* holds the length of sending row */ 5232 nrecvs = gen_from->n; 5233 nsends = gen_to->n; 5234 5235 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5236 srow = gen_to->indices; /* local row index to be sent */ 5237 sstarts = gen_to->starts; 5238 sprocs = gen_to->procs; 5239 sstatus = gen_to->sstatus; 5240 sbs = gen_to->bs; 5241 rstarts = gen_from->starts; 5242 rprocs = gen_from->procs; 5243 rbs = gen_from->bs; 5244 5245 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5246 if (scall == MAT_INITIAL_MATRIX){ 5247 /* i-array */ 5248 /*---------*/ 5249 /* post receives */ 5250 for (i=0; i<nrecvs; i++){ 5251 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5252 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5253 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5254 } 5255 5256 /* pack the outgoing message */ 5257 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5258 sstartsj[0] = 0; rstartsj[0] = 0; 5259 len = 0; /* total length of j or a array to be sent */ 5260 k = 0; 5261 for (i=0; i<nsends; i++){ 5262 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5263 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5264 for (j=0; j<nrows; j++) { 5265 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5266 for (l=0; l<sbs; l++){ 5267 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5268 rowlen[j*sbs+l] = ncols; 5269 len += ncols; 5270 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5271 } 5272 k++; 5273 } 5274 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5275 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5276 } 5277 /* recvs and sends of i-array are completed */ 5278 i = nrecvs; 5279 while (i--) { 5280 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5281 } 5282 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5283 5284 /* allocate buffers for sending j and a arrays */ 5285 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5286 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5287 5288 /* create i-array of B_oth */ 5289 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5290 b_othi[0] = 0; 5291 len = 0; /* total length of j or a array to be received */ 5292 k = 0; 5293 for (i=0; i<nrecvs; i++){ 5294 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5295 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5296 for (j=0; j<nrows; j++) { 5297 b_othi[k+1] = b_othi[k] + rowlen[j]; 5298 len += rowlen[j]; k++; 5299 } 5300 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5301 } 5302 5303 /* allocate space for j and a arrrays of B_oth */ 5304 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5305 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5306 5307 /* j-array */ 5308 /*---------*/ 5309 /* post receives of j-array */ 5310 for (i=0; i<nrecvs; i++){ 5311 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5312 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5313 } 5314 5315 /* pack the outgoing message j-array */ 5316 k = 0; 5317 for (i=0; i<nsends; i++){ 5318 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5319 bufJ = bufj+sstartsj[i]; 5320 for (j=0; j<nrows; j++) { 5321 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5322 for (ll=0; ll<sbs; ll++){ 5323 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5324 for (l=0; l<ncols; l++){ 5325 *bufJ++ = cols[l]; 5326 } 5327 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5328 } 5329 } 5330 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5331 } 5332 5333 /* recvs and sends of j-array are completed */ 5334 i = nrecvs; 5335 while (i--) { 5336 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5337 } 5338 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5339 } else if (scall == MAT_REUSE_MATRIX){ 5340 sstartsj = *startsj_s; 5341 rstartsj = *startsj_r; 5342 bufa = *bufa_ptr; 5343 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5344 b_otha = b_oth->a; 5345 } else { 5346 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5347 } 5348 5349 /* a-array */ 5350 /*---------*/ 5351 /* post receives of a-array */ 5352 for (i=0; i<nrecvs; i++){ 5353 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5354 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5355 } 5356 5357 /* pack the outgoing message a-array */ 5358 k = 0; 5359 for (i=0; i<nsends; i++){ 5360 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5361 bufA = bufa+sstartsj[i]; 5362 for (j=0; j<nrows; j++) { 5363 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5364 for (ll=0; ll<sbs; ll++){ 5365 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5366 for (l=0; l<ncols; l++){ 5367 *bufA++ = vals[l]; 5368 } 5369 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5370 } 5371 } 5372 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5373 } 5374 /* recvs and sends of a-array are completed */ 5375 i = nrecvs; 5376 while (i--) { 5377 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5378 } 5379 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5380 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5381 5382 if (scall == MAT_INITIAL_MATRIX){ 5383 /* put together the new matrix */ 5384 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5385 5386 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5387 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5388 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5389 b_oth->free_a = PETSC_TRUE; 5390 b_oth->free_ij = PETSC_TRUE; 5391 b_oth->nonew = 0; 5392 5393 ierr = PetscFree(bufj);CHKERRQ(ierr); 5394 if (!startsj_s || !bufa_ptr){ 5395 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5396 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5397 } else { 5398 *startsj_s = sstartsj; 5399 *startsj_r = rstartsj; 5400 *bufa_ptr = bufa; 5401 } 5402 } 5403 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5404 PetscFunctionReturn(0); 5405 } 5406 5407 #undef __FUNCT__ 5408 #define __FUNCT__ "MatGetCommunicationStructs" 5409 /*@C 5410 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5411 5412 Not Collective 5413 5414 Input Parameters: 5415 . A - The matrix in mpiaij format 5416 5417 Output Parameter: 5418 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5419 . colmap - A map from global column index to local index into lvec 5420 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5421 5422 Level: developer 5423 5424 @*/ 5425 #if defined (PETSC_USE_CTABLE) 5426 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5427 #else 5428 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5429 #endif 5430 { 5431 Mat_MPIAIJ *a; 5432 5433 PetscFunctionBegin; 5434 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5435 PetscValidPointer(lvec, 2); 5436 PetscValidPointer(colmap, 3); 5437 PetscValidPointer(multScatter, 4); 5438 a = (Mat_MPIAIJ *) A->data; 5439 if (lvec) *lvec = a->lvec; 5440 if (colmap) *colmap = a->colmap; 5441 if (multScatter) *multScatter = a->Mvctx; 5442 PetscFunctionReturn(0); 5443 } 5444 5445 EXTERN_C_BEGIN 5446 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5447 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5448 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5449 EXTERN_C_END 5450 5451 #undef __FUNCT__ 5452 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5453 /* 5454 Computes (B'*A')' since computing B*A directly is untenable 5455 5456 n p p 5457 ( ) ( ) ( ) 5458 m ( A ) * n ( B ) = m ( C ) 5459 ( ) ( ) ( ) 5460 5461 */ 5462 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5463 { 5464 PetscErrorCode ierr; 5465 Mat At,Bt,Ct; 5466 5467 PetscFunctionBegin; 5468 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5469 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5470 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5471 ierr = MatDestroy(&At);CHKERRQ(ierr); 5472 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5473 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5474 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5475 PetscFunctionReturn(0); 5476 } 5477 5478 #undef __FUNCT__ 5479 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5480 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5481 { 5482 PetscErrorCode ierr; 5483 PetscInt m=A->rmap->n,n=B->cmap->n; 5484 Mat Cmat; 5485 5486 PetscFunctionBegin; 5487 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5488 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5489 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5490 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5491 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5492 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5493 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5494 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5495 *C = Cmat; 5496 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5497 PetscFunctionReturn(0); 5498 } 5499 5500 /* ----------------------------------------------------------------*/ 5501 #undef __FUNCT__ 5502 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5503 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5504 { 5505 PetscErrorCode ierr; 5506 5507 PetscFunctionBegin; 5508 if (scall == MAT_INITIAL_MATRIX){ 5509 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5510 } 5511 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5512 PetscFunctionReturn(0); 5513 } 5514 5515 EXTERN_C_BEGIN 5516 #if defined(PETSC_HAVE_MUMPS) 5517 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5518 #endif 5519 #if defined(PETSC_HAVE_PASTIX) 5520 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5521 #endif 5522 #if defined(PETSC_HAVE_SUPERLU_DIST) 5523 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5524 #endif 5525 #if defined(PETSC_HAVE_SPOOLES) 5526 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5527 #endif 5528 EXTERN_C_END 5529 5530 /*MC 5531 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5532 5533 Options Database Keys: 5534 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5535 5536 Level: beginner 5537 5538 .seealso: MatCreateAIJ() 5539 M*/ 5540 5541 EXTERN_C_BEGIN 5542 #undef __FUNCT__ 5543 #define __FUNCT__ "MatCreate_MPIAIJ" 5544 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5545 { 5546 Mat_MPIAIJ *b; 5547 PetscErrorCode ierr; 5548 PetscMPIInt size; 5549 5550 PetscFunctionBegin; 5551 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5552 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5553 B->data = (void*)b; 5554 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5555 B->assembled = PETSC_FALSE; 5556 B->insertmode = NOT_SET_VALUES; 5557 b->size = size; 5558 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5559 5560 /* build cache for off array entries formed */ 5561 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5562 b->donotstash = PETSC_FALSE; 5563 b->colmap = 0; 5564 b->garray = 0; 5565 b->roworiented = PETSC_TRUE; 5566 5567 /* stuff used for matrix vector multiply */ 5568 b->lvec = PETSC_NULL; 5569 b->Mvctx = PETSC_NULL; 5570 5571 /* stuff for MatGetRow() */ 5572 b->rowindices = 0; 5573 b->rowvalues = 0; 5574 b->getrowactive = PETSC_FALSE; 5575 5576 /* flexible pointer used in CUSP/CUSPARSE classes */ 5577 b->spptr = PETSC_NULL; 5578 #if defined(PETSC_HAVE_SPOOLES) 5579 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5580 "MatGetFactor_mpiaij_spooles", 5581 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5582 #endif 5583 #if defined(PETSC_HAVE_MUMPS) 5584 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5585 "MatGetFactor_aij_mumps", 5586 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5587 #endif 5588 #if defined(PETSC_HAVE_PASTIX) 5589 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5590 "MatGetFactor_mpiaij_pastix", 5591 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5592 #endif 5593 #if defined(PETSC_HAVE_SUPERLU_DIST) 5594 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5595 "MatGetFactor_mpiaij_superlu_dist", 5596 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5597 #endif 5598 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5599 "MatStoreValues_MPIAIJ", 5600 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5601 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5602 "MatRetrieveValues_MPIAIJ", 5603 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5604 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5605 "MatGetDiagonalBlock_MPIAIJ", 5606 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5607 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5608 "MatIsTranspose_MPIAIJ", 5609 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5610 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5611 "MatMPIAIJSetPreallocation_MPIAIJ", 5612 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5613 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5614 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5615 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5616 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5617 "MatDiagonalScaleLocal_MPIAIJ", 5618 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5619 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5620 "MatConvert_MPIAIJ_MPIAIJPERM", 5621 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5622 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5623 "MatConvert_MPIAIJ_MPIAIJCRL", 5624 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5625 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5626 "MatConvert_MPIAIJ_MPISBAIJ", 5627 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5628 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5629 "MatMatMult_MPIDense_MPIAIJ", 5630 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5631 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5632 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5633 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5634 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5635 "MatMatMultNumeric_MPIDense_MPIAIJ", 5636 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5637 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5638 PetscFunctionReturn(0); 5639 } 5640 EXTERN_C_END 5641 5642 #undef __FUNCT__ 5643 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5644 /*@ 5645 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5646 and "off-diagonal" part of the matrix in CSR format. 5647 5648 Collective on MPI_Comm 5649 5650 Input Parameters: 5651 + comm - MPI communicator 5652 . m - number of local rows (Cannot be PETSC_DECIDE) 5653 . n - This value should be the same as the local size used in creating the 5654 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5655 calculated if N is given) For square matrices n is almost always m. 5656 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5657 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5658 . i - row indices for "diagonal" portion of matrix 5659 . j - column indices 5660 . a - matrix values 5661 . oi - row indices for "off-diagonal" portion of matrix 5662 . oj - column indices 5663 - oa - matrix values 5664 5665 Output Parameter: 5666 . mat - the matrix 5667 5668 Level: advanced 5669 5670 Notes: 5671 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5672 must free the arrays once the matrix has been destroyed and not before. 5673 5674 The i and j indices are 0 based 5675 5676 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5677 5678 This sets local rows and cannot be used to set off-processor values. 5679 5680 You cannot later use MatSetValues() to change values in this matrix. 5681 5682 .keywords: matrix, aij, compressed row, sparse, parallel 5683 5684 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5685 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5686 @*/ 5687 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5688 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5689 { 5690 PetscErrorCode ierr; 5691 Mat_MPIAIJ *maij; 5692 5693 PetscFunctionBegin; 5694 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5695 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5696 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5697 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5698 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5699 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5700 maij = (Mat_MPIAIJ*) (*mat)->data; 5701 maij->donotstash = PETSC_TRUE; 5702 (*mat)->preallocated = PETSC_TRUE; 5703 5704 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5705 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5706 5707 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5708 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5709 5710 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5711 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5712 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5713 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5714 5715 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5716 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5717 PetscFunctionReturn(0); 5718 } 5719 5720 /* 5721 Special version for direct calls from Fortran 5722 */ 5723 #include <petsc-private/fortranimpl.h> 5724 5725 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5726 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5727 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5728 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5729 #endif 5730 5731 /* Change these macros so can be used in void function */ 5732 #undef CHKERRQ 5733 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5734 #undef SETERRQ2 5735 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5736 #undef SETERRQ3 5737 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5738 #undef SETERRQ 5739 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5740 5741 EXTERN_C_BEGIN 5742 #undef __FUNCT__ 5743 #define __FUNCT__ "matsetvaluesmpiaij_" 5744 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5745 { 5746 Mat mat = *mmat; 5747 PetscInt m = *mm, n = *mn; 5748 InsertMode addv = *maddv; 5749 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5750 PetscScalar value; 5751 PetscErrorCode ierr; 5752 5753 MatCheckPreallocated(mat,1); 5754 if (mat->insertmode == NOT_SET_VALUES) { 5755 mat->insertmode = addv; 5756 } 5757 #if defined(PETSC_USE_DEBUG) 5758 else if (mat->insertmode != addv) { 5759 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5760 } 5761 #endif 5762 { 5763 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5764 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5765 PetscBool roworiented = aij->roworiented; 5766 5767 /* Some Variables required in the macro */ 5768 Mat A = aij->A; 5769 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5770 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5771 MatScalar *aa = a->a; 5772 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5773 Mat B = aij->B; 5774 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5775 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5776 MatScalar *ba = b->a; 5777 5778 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5779 PetscInt nonew = a->nonew; 5780 MatScalar *ap1,*ap2; 5781 5782 PetscFunctionBegin; 5783 for (i=0; i<m; i++) { 5784 if (im[i] < 0) continue; 5785 #if defined(PETSC_USE_DEBUG) 5786 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5787 #endif 5788 if (im[i] >= rstart && im[i] < rend) { 5789 row = im[i] - rstart; 5790 lastcol1 = -1; 5791 rp1 = aj + ai[row]; 5792 ap1 = aa + ai[row]; 5793 rmax1 = aimax[row]; 5794 nrow1 = ailen[row]; 5795 low1 = 0; 5796 high1 = nrow1; 5797 lastcol2 = -1; 5798 rp2 = bj + bi[row]; 5799 ap2 = ba + bi[row]; 5800 rmax2 = bimax[row]; 5801 nrow2 = bilen[row]; 5802 low2 = 0; 5803 high2 = nrow2; 5804 5805 for (j=0; j<n; j++) { 5806 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5807 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5808 if (in[j] >= cstart && in[j] < cend){ 5809 col = in[j] - cstart; 5810 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5811 } else if (in[j] < 0) continue; 5812 #if defined(PETSC_USE_DEBUG) 5813 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5814 #endif 5815 else { 5816 if (mat->was_assembled) { 5817 if (!aij->colmap) { 5818 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5819 } 5820 #if defined (PETSC_USE_CTABLE) 5821 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5822 col--; 5823 #else 5824 col = aij->colmap[in[j]] - 1; 5825 #endif 5826 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5827 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5828 col = in[j]; 5829 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5830 B = aij->B; 5831 b = (Mat_SeqAIJ*)B->data; 5832 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5833 rp2 = bj + bi[row]; 5834 ap2 = ba + bi[row]; 5835 rmax2 = bimax[row]; 5836 nrow2 = bilen[row]; 5837 low2 = 0; 5838 high2 = nrow2; 5839 bm = aij->B->rmap->n; 5840 ba = b->a; 5841 } 5842 } else col = in[j]; 5843 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5844 } 5845 } 5846 } else { 5847 if (!aij->donotstash) { 5848 if (roworiented) { 5849 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5850 } else { 5851 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5852 } 5853 } 5854 } 5855 }} 5856 PetscFunctionReturnVoid(); 5857 } 5858 EXTERN_C_END 5859 5860