1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.52 1996/06/25 16:03:11 balay Exp curfman $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "src/vec/vecimpl.h" 12 #include "src/inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 MatReorderingRegisterAll(); 41 ishift = 0; 42 oshift = -MatReorderingIndexShift[(int)type]; 43 if (MatReorderingRequiresSymmetric[(int)type]) { 44 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr); 45 ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 46 PetscFree(ia); PetscFree(ja); 47 } else { 48 if (ishift == oshift) { 49 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 50 } 51 else if (ishift == -1) { 52 /* temporarily subtract 1 from i and j indices */ 53 int nz = a->i[n] - 1; 54 for ( i=0; i<nz; i++ ) a->j[i]--; 55 for ( i=0; i<n+1; i++ ) a->i[i]--; 56 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 57 for ( i=0; i<nz; i++ ) a->j[i]++; 58 for ( i=0; i<n+1; i++ ) a->i[i]++; 59 } else { 60 /* temporarily add 1 to i and j indices */ 61 int nz = a->i[n] - 1; 62 for ( i=0; i<nz; i++ ) a->j[i]++; 63 for ( i=0; i<n+1; i++ ) a->i[i]++; 64 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 65 for ( i=0; i<nz; i++ ) a->j[i]--; 66 for ( i=0; i<n+1; i++ ) a->i[i]--; 67 } 68 } 69 return 0; 70 } 71 72 /* 73 Adds diagonal pointers to sparse matrix structure. 74 */ 75 76 int MatMarkDiag_SeqBAIJ(Mat A) 77 { 78 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 79 int i,j, *diag, m = a->mbs; 80 81 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 82 PLogObjectMemory(A,(m+1)*sizeof(int)); 83 for ( i=0; i<m; i++ ) { 84 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 85 if (a->j[j] == i) { 86 diag[i] = j; 87 break; 88 } 89 } 90 } 91 a->diag = diag; 92 return 0; 93 } 94 95 #include "draw.h" 96 #include "pinclude/pviewer.h" 97 #include "sys.h" 98 99 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 100 { 101 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 102 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 103 Scalar *aa; 104 105 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 106 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 107 col_lens[0] = MAT_COOKIE; 108 col_lens[1] = a->m; 109 col_lens[2] = a->n; 110 col_lens[3] = a->nz*bs2; 111 112 /* store lengths of each row and write (including header) to file */ 113 count = 0; 114 for ( i=0; i<a->mbs; i++ ) { 115 for ( j=0; j<bs; j++ ) { 116 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 117 } 118 } 119 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 120 PetscFree(col_lens); 121 122 /* store column indices (zero start index) */ 123 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 124 count = 0; 125 for ( i=0; i<a->mbs; i++ ) { 126 for ( j=0; j<bs; j++ ) { 127 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 128 for ( l=0; l<bs; l++ ) { 129 jj[count++] = bs*a->j[k] + l; 130 } 131 } 132 } 133 } 134 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 135 PetscFree(jj); 136 137 /* store nonzero values */ 138 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 139 count = 0; 140 for ( i=0; i<a->mbs; i++ ) { 141 for ( j=0; j<bs; j++ ) { 142 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 143 for ( l=0; l<bs; l++ ) { 144 aa[count++] = a->a[bs2*k + l*bs + j]; 145 } 146 } 147 } 148 } 149 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 150 PetscFree(aa); 151 return 0; 152 } 153 154 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 155 { 156 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 157 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 158 FILE *fd; 159 char *outputname; 160 161 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 162 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 163 ierr = ViewerGetFormat(viewer,&format); 164 if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) { 165 fprintf(fd," block size is %d\n",bs); 166 } 167 else if (format == ASCII_FORMAT_MATLAB) { 168 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 169 } 170 else if (format == ASCII_FORMAT_COMMON) { 171 for ( i=0; i<a->mbs; i++ ) { 172 for ( j=0; j<bs; j++ ) { 173 fprintf(fd,"row %d:",i*bs+j); 174 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 175 for ( l=0; l<bs; l++ ) { 176 #if defined(PETSC_COMPLEX) 177 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 178 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 179 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 180 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 181 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 182 #else 183 if (a->a[bs2*k + l*bs + j] != 0.0) 184 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 185 #endif 186 } 187 } 188 fprintf(fd,"\n"); 189 } 190 } 191 } 192 else { 193 for ( i=0; i<a->mbs; i++ ) { 194 for ( j=0; j<bs; j++ ) { 195 fprintf(fd,"row %d:",i*bs+j); 196 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 197 for ( l=0; l<bs; l++ ) { 198 #if defined(PETSC_COMPLEX) 199 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 200 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 201 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 202 } 203 else { 204 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 205 } 206 #else 207 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 208 #endif 209 } 210 } 211 fprintf(fd,"\n"); 212 } 213 } 214 } 215 fflush(fd); 216 return 0; 217 } 218 219 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 220 { 221 Mat A = (Mat) obj; 222 ViewerType vtype; 223 int ierr; 224 225 if (!viewer) { 226 viewer = STDOUT_VIEWER_SELF; 227 } 228 229 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 230 if (vtype == MATLAB_VIEWER) { 231 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 232 } 233 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 234 return MatView_SeqBAIJ_ASCII(A,viewer); 235 } 236 else if (vtype == BINARY_FILE_VIEWER) { 237 return MatView_SeqBAIJ_Binary(A,viewer); 238 } 239 else if (vtype == DRAW_VIEWER) { 240 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 241 } 242 return 0; 243 } 244 245 #define CHUNKSIZE 10 246 247 /* This version has row oriented v */ 248 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 249 { 250 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 251 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 252 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 253 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 254 int ridx,cidx,bs2=a->bs2; 255 Scalar *ap,value,*aa=a->a,*bap; 256 257 for ( k=0; k<m; k++ ) { /* loop over added rows */ 258 row = im[k]; brow = row/bs; 259 if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 260 if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 261 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 262 rmax = imax[brow]; nrow = ailen[brow]; 263 low = 0; 264 for ( l=0; l<n; l++ ) { /* loop over added columns */ 265 if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 266 if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 267 col = in[l]; bcol = col/bs; 268 ridx = row % bs; cidx = col % bs; 269 if (roworiented) { 270 value = *v++; 271 } 272 else { 273 value = v[k + l*m]; 274 } 275 if (!sorted) low = 0; high = nrow; 276 while (high-low > 5) { 277 t = (low+high)/2; 278 if (rp[t] > bcol) high = t; 279 else low = t; 280 } 281 for ( i=low; i<high; i++ ) { 282 if (rp[i] > bcol) break; 283 if (rp[i] == bcol) { 284 bap = ap + bs2*i + bs*cidx + ridx; 285 if (is == ADD_VALUES) *bap += value; 286 else *bap = value; 287 goto noinsert; 288 } 289 } 290 if (nonew) goto noinsert; 291 if (nrow >= rmax) { 292 /* there is no extra room in row, therefore enlarge */ 293 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 294 Scalar *new_a; 295 296 /* malloc new storage space */ 297 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 298 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 299 new_j = (int *) (new_a + bs2*new_nz); 300 new_i = new_j + new_nz; 301 302 /* copy over old data into new slots */ 303 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 304 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 305 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 306 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 307 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 308 len*sizeof(int)); 309 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 310 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 311 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 312 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 313 /* free up old matrix storage */ 314 PetscFree(a->a); 315 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 316 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 317 a->singlemalloc = 1; 318 319 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 320 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 321 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 322 a->maxnz += bs2*CHUNKSIZE; 323 a->reallocs++; 324 a->nz++; 325 } 326 N = nrow++ - 1; 327 /* shift up all the later entries in this row */ 328 for ( ii=N; ii>=i; ii-- ) { 329 rp[ii+1] = rp[ii]; 330 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 331 } 332 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 333 rp[i] = bcol; 334 ap[bs2*i + bs*cidx + ridx] = value; 335 noinsert:; 336 low = i; 337 } 338 ailen[brow] = nrow; 339 } 340 return 0; 341 } 342 343 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 344 { 345 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 346 *m = a->m; *n = a->n; 347 return 0; 348 } 349 350 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 351 { 352 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 353 *m = 0; *n = a->m; 354 return 0; 355 } 356 357 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 358 { 359 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 360 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 361 Scalar *aa,*v_i,*aa_i; 362 363 bs = a->bs; 364 ai = a->i; 365 aj = a->j; 366 aa = a->a; 367 bs2 = a->bs2; 368 369 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 370 371 bn = row/bs; /* Block number */ 372 bp = row % bs; /* Block Position */ 373 M = ai[bn+1] - ai[bn]; 374 *nz = bs*M; 375 376 if (v) { 377 *v = 0; 378 if (*nz) { 379 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 380 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 381 v_i = *v + i*bs; 382 aa_i = aa + bs2*(ai[bn] + i); 383 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 384 } 385 } 386 } 387 388 if (idx) { 389 *idx = 0; 390 if (*nz) { 391 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 392 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 393 idx_i = *idx + i*bs; 394 itmp = bs*aj[ai[bn] + i]; 395 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 396 } 397 } 398 } 399 return 0; 400 } 401 402 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 403 { 404 if (idx) {if (*idx) PetscFree(*idx);} 405 if (v) {if (*v) PetscFree(*v);} 406 return 0; 407 } 408 409 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 410 { 411 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 412 Mat C; 413 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 414 int *rows,*cols,bs2=a->bs2; 415 Scalar *array=a->a; 416 417 if (B==PETSC_NULL && mbs!=nbs) 418 SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 419 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 420 PetscMemzero(col,(1+nbs)*sizeof(int)); 421 422 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 423 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 424 PetscFree(col); 425 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 426 cols = rows + bs; 427 for ( i=0; i<mbs; i++ ) { 428 cols[0] = i*bs; 429 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 430 len = ai[i+1] - ai[i]; 431 for ( j=0; j<len; j++ ) { 432 rows[0] = (*aj++)*bs; 433 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 434 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 435 array += bs2; 436 } 437 } 438 PetscFree(rows); 439 440 ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 441 ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 442 443 if (B != PETSC_NULL) { 444 *B = C; 445 } else { 446 /* This isn't really an in-place transpose */ 447 PetscFree(a->a); 448 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 449 if (a->diag) PetscFree(a->diag); 450 if (a->ilen) PetscFree(a->ilen); 451 if (a->imax) PetscFree(a->imax); 452 if (a->solve_work) PetscFree(a->solve_work); 453 PetscFree(a); 454 PetscMemcpy(A,C,sizeof(struct _Mat)); 455 PetscHeaderDestroy(C); 456 } 457 return 0; 458 } 459 460 461 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 462 { 463 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 464 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 465 int m = a->m,*ip, N, *ailen = a->ilen; 466 int mbs = a->mbs, bs2 = a->bs2; 467 Scalar *aa = a->a, *ap; 468 469 if (mode == FLUSH_ASSEMBLY) return 0; 470 471 for ( i=1; i<mbs; i++ ) { 472 /* move each row back by the amount of empty slots (fshift) before it*/ 473 fshift += imax[i-1] - ailen[i-1]; 474 if (fshift) { 475 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 476 N = ailen[i]; 477 for ( j=0; j<N; j++ ) { 478 ip[j-fshift] = ip[j]; 479 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 480 } 481 } 482 ai[i] = ai[i-1] + ailen[i-1]; 483 } 484 if (mbs) { 485 fshift += imax[mbs-1] - ailen[mbs-1]; 486 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 487 } 488 /* reset ilen and imax for each row */ 489 for ( i=0; i<mbs; i++ ) { 490 ailen[i] = imax[i] = ai[i+1] - ai[i]; 491 } 492 a->nz = ai[mbs]; 493 494 /* diagonals may have moved, so kill the diagonal pointers */ 495 if (fshift && a->diag) { 496 PetscFree(a->diag); 497 PLogObjectMemory(A,-(m+1)*sizeof(int)); 498 a->diag = 0; 499 } 500 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space (blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs); 501 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n", 502 a->reallocs); 503 return 0; 504 } 505 506 static int MatZeroEntries_SeqBAIJ(Mat A) 507 { 508 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 509 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 510 return 0; 511 } 512 513 int MatDestroy_SeqBAIJ(PetscObject obj) 514 { 515 Mat A = (Mat) obj; 516 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 517 518 #if defined(PETSC_LOG) 519 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 520 #endif 521 PetscFree(a->a); 522 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 523 if (a->diag) PetscFree(a->diag); 524 if (a->ilen) PetscFree(a->ilen); 525 if (a->imax) PetscFree(a->imax); 526 if (a->solve_work) PetscFree(a->solve_work); 527 if (a->mult_work) PetscFree(a->mult_work); 528 PetscFree(a); 529 PLogObjectDestroy(A); 530 PetscHeaderDestroy(A); 531 return 0; 532 } 533 534 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 535 { 536 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 537 if (op == ROW_ORIENTED) a->roworiented = 1; 538 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 539 else if (op == COLUMNS_SORTED) a->sorted = 1; 540 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 541 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 542 else if (op == ROWS_SORTED || 543 op == SYMMETRIC_MATRIX || 544 op == STRUCTURALLY_SYMMETRIC_MATRIX || 545 op == YES_NEW_DIAGONALS) 546 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 547 else if (op == NO_NEW_DIAGONALS) 548 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 549 else 550 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 551 return 0; 552 } 553 554 555 /* -------------------------------------------------------*/ 556 /* Should check that shapes of vectors and matrices match */ 557 /* -------------------------------------------------------*/ 558 #include "pinclude/plapack.h" 559 560 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 561 { 562 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 563 Scalar *xg,*zg; 564 register Scalar *x,*z,*v,sum; 565 int mbs=a->mbs,i,*idx,*ii,n,ierr; 566 567 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 568 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 569 570 idx = a->j; 571 v = a->a; 572 ii = a->i; 573 574 for ( i=0; i<mbs; i++ ) { 575 n = ii[1] - ii[0]; ii++; 576 sum = 0.0; 577 while (n--) sum += *v++ * x[*idx++]; 578 z[i] = sum; 579 } 580 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 581 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 582 PLogFlops(2*a->nz - a->m); 583 return 0; 584 } 585 586 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 587 { 588 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 589 Scalar *xg,*zg; 590 register Scalar *x,*z,*v,*xb,sum1,sum2; 591 register Scalar x1,x2; 592 int mbs=a->mbs,i,*idx,*ii; 593 int j,n,ierr; 594 595 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 596 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 597 598 idx = a->j; 599 v = a->a; 600 ii = a->i; 601 602 for ( i=0; i<mbs; i++ ) { 603 n = ii[1] - ii[0]; ii++; 604 sum1 = 0.0; sum2 = 0.0; 605 for ( j=0; j<n; j++ ) { 606 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 607 sum1 += v[0]*x1 + v[2]*x2; 608 sum2 += v[1]*x1 + v[3]*x2; 609 v += 4; 610 } 611 z[0] = sum1; z[1] = sum2; 612 z += 2; 613 } 614 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 615 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 616 PLogFlops(4*a->nz - a->m); 617 return 0; 618 } 619 620 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 621 { 622 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 623 Scalar *xg,*zg; 624 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 625 int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 626 627 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 628 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 629 630 idx = a->j; 631 v = a->a; 632 ii = a->i; 633 634 for ( i=0; i<mbs; i++ ) { 635 n = ii[1] - ii[0]; ii++; 636 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 637 for ( j=0; j<n; j++ ) { 638 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 639 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 640 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 641 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 642 v += 9; 643 } 644 z[0] = sum1; z[1] = sum2; z[2] = sum3; 645 z += 3; 646 } 647 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 648 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 649 PLogFlops(18*a->nz - a->m); 650 return 0; 651 } 652 653 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 654 { 655 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 656 Scalar *xg,*zg; 657 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 658 register Scalar x1,x2,x3,x4; 659 int mbs=a->mbs,i,*idx,*ii; 660 int j,n,ierr; 661 662 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 663 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 664 665 idx = a->j; 666 v = a->a; 667 ii = a->i; 668 669 for ( i=0; i<mbs; i++ ) { 670 n = ii[1] - ii[0]; ii++; 671 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 672 for ( j=0; j<n; j++ ) { 673 xb = x + 4*(*idx++); 674 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 675 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 676 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 677 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 678 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 679 v += 16; 680 } 681 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 682 z += 4; 683 } 684 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 685 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 686 PLogFlops(32*a->nz - a->m); 687 return 0; 688 } 689 690 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 691 { 692 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 693 Scalar *xg,*zg; 694 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 695 register Scalar x1,x2,x3,x4,x5; 696 int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 697 698 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 699 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 700 701 idx = a->j; 702 v = a->a; 703 ii = a->i; 704 705 for ( i=0; i<mbs; i++ ) { 706 n = ii[1] - ii[0]; ii++; 707 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 708 for ( j=0; j<n; j++ ) { 709 xb = x + 5*(*idx++); 710 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 711 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 712 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 713 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 714 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 715 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 716 v += 25; 717 } 718 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 719 z += 5; 720 } 721 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 722 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 723 PLogFlops(50*a->nz - a->m); 724 return 0; 725 } 726 727 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 728 { 729 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 730 Scalar *xg,*zg; 731 register Scalar *x,*z,*v,*xb; 732 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 733 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt, _DZero = 0.0; 734 735 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 736 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 737 738 idx = a->j; 739 v = a->a; 740 ii = a->i; 741 742 743 if (!a->mult_work) { 744 k = PetscMax(a->m,a->n); 745 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 746 } 747 work = a->mult_work; 748 for ( i=0; i<mbs; i++ ) { 749 n = ii[1] - ii[0]; ii++; 750 ncols = n*bs; 751 workt = work; 752 for ( j=0; j<n; j++ ) { 753 xb = x + bs*(*idx++); 754 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 755 workt += bs; 756 } 757 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 758 v += n*bs2; 759 z += bs; 760 } 761 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 762 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 763 PLogFlops(2*a->nz*bs2 - a->m); 764 return 0; 765 } 766 767 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 768 { 769 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 770 Scalar *xg,*yg,*zg; 771 register Scalar *x,*y,*z,*v,sum; 772 int mbs=a->mbs,i,*idx,*ii,n,ierr; 773 774 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 775 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 776 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 777 778 idx = a->j; 779 v = a->a; 780 ii = a->i; 781 782 for ( i=0; i<mbs; i++ ) { 783 n = ii[1] - ii[0]; ii++; 784 sum = y[i]; 785 while (n--) sum += *v++ * x[*idx++]; 786 z[i] = sum; 787 } 788 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 789 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 790 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 791 PLogFlops(2*a->nz); 792 return 0; 793 } 794 795 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 796 { 797 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 798 Scalar *xg,*yg,*zg; 799 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 800 register Scalar x1,x2; 801 int mbs=a->mbs,i,*idx,*ii; 802 int j,n,ierr; 803 804 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 805 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 806 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 807 808 idx = a->j; 809 v = a->a; 810 ii = a->i; 811 812 for ( i=0; i<mbs; i++ ) { 813 n = ii[1] - ii[0]; ii++; 814 sum1 = y[0]; sum2 = y[1]; 815 for ( j=0; j<n; j++ ) { 816 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 817 sum1 += v[0]*x1 + v[2]*x2; 818 sum2 += v[1]*x1 + v[3]*x2; 819 v += 4; 820 } 821 z[0] = sum1; z[1] = sum2; 822 z += 2; y += 2; 823 } 824 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 825 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 826 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 827 PLogFlops(4*a->nz); 828 return 0; 829 } 830 831 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 832 { 833 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 834 Scalar *xg,*yg,*zg; 835 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 836 int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 837 838 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 839 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 840 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 841 842 idx = a->j; 843 v = a->a; 844 ii = a->i; 845 846 for ( i=0; i<mbs; i++ ) { 847 n = ii[1] - ii[0]; ii++; 848 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 849 for ( j=0; j<n; j++ ) { 850 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 851 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 852 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 853 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 854 v += 9; 855 } 856 z[0] = sum1; z[1] = sum2; z[2] = sum3; 857 z += 3; y += 3; 858 } 859 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 860 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 861 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 862 PLogFlops(18*a->nz); 863 return 0; 864 } 865 866 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 867 { 868 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 869 Scalar *xg,*yg,*zg; 870 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 871 register Scalar x1,x2,x3,x4; 872 int mbs=a->mbs,i,*idx,*ii; 873 int j,n,ierr; 874 875 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 876 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 877 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 878 879 idx = a->j; 880 v = a->a; 881 ii = a->i; 882 883 for ( i=0; i<mbs; i++ ) { 884 n = ii[1] - ii[0]; ii++; 885 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 886 for ( j=0; j<n; j++ ) { 887 xb = x + 4*(*idx++); 888 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 889 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 890 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 891 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 892 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 893 v += 16; 894 } 895 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 896 z += 4; y += 4; 897 } 898 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 899 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 900 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 901 PLogFlops(32*a->nz); 902 return 0; 903 } 904 905 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 906 { 907 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 908 Scalar *xg,*yg,*zg; 909 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 910 register Scalar x1,x2,x3,x4,x5; 911 int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 912 913 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 914 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 915 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 916 917 idx = a->j; 918 v = a->a; 919 ii = a->i; 920 921 for ( i=0; i<mbs; i++ ) { 922 n = ii[1] - ii[0]; ii++; 923 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 924 for ( j=0; j<n; j++ ) { 925 xb = x + 5*(*idx++); 926 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 927 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 928 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 929 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 930 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 931 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 932 v += 25; 933 } 934 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 935 z += 5; y += 5; 936 } 937 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 938 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 939 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 940 PLogFlops(50*a->nz); 941 return 0; 942 } 943 944 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 945 { 946 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 947 Scalar *xg,*zg; 948 register Scalar *x,*z,*v,*xb; 949 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 950 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 951 952 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 953 954 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 955 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 956 957 idx = a->j; 958 v = a->a; 959 ii = a->i; 960 961 962 if (!a->mult_work) { 963 k = PetscMax(a->m,a->n); 964 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 965 } 966 work = a->mult_work; 967 for ( i=0; i<mbs; i++ ) { 968 n = ii[1] - ii[0]; ii++; 969 ncols = n*bs; 970 workt = work; 971 for ( j=0; j<n; j++ ) { 972 xb = x + bs*(*idx++); 973 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 974 workt += bs; 975 } 976 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 977 v += n*bs2; 978 z += bs; 979 } 980 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 981 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 982 PLogFlops(2*a->nz*bs2 ); 983 return 0; 984 } 985 986 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 987 { 988 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 989 Scalar *xg,*zg,*zb; 990 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 991 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 992 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 993 994 995 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 996 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 997 PetscMemzero(z,N*sizeof(Scalar)); 998 999 idx = a->j; 1000 v = a->a; 1001 ii = a->i; 1002 1003 switch (bs) { 1004 case 1: 1005 for ( i=0; i<mbs; i++ ) { 1006 n = ii[1] - ii[0]; ii++; 1007 xb = x + i; x1 = xb[0]; 1008 ib = idx + ai[i]; 1009 for ( j=0; j<n; j++ ) { 1010 rval = ib[j]; 1011 z[rval] += *v++ * x1; 1012 } 1013 } 1014 break; 1015 case 2: 1016 for ( i=0; i<mbs; i++ ) { 1017 n = ii[1] - ii[0]; ii++; 1018 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1019 ib = idx + ai[i]; 1020 for ( j=0; j<n; j++ ) { 1021 rval = ib[j]*2; 1022 z[rval++] += v[0]*x1 + v[1]*x2; 1023 z[rval++] += v[2]*x1 + v[3]*x2; 1024 v += 4; 1025 } 1026 } 1027 break; 1028 case 3: 1029 for ( i=0; i<mbs; i++ ) { 1030 n = ii[1] - ii[0]; ii++; 1031 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1032 ib = idx + ai[i]; 1033 for ( j=0; j<n; j++ ) { 1034 rval = ib[j]*3; 1035 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1036 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1037 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1038 v += 9; 1039 } 1040 } 1041 break; 1042 case 4: 1043 for ( i=0; i<mbs; i++ ) { 1044 n = ii[1] - ii[0]; ii++; 1045 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1046 ib = idx + ai[i]; 1047 for ( j=0; j<n; j++ ) { 1048 rval = ib[j]*4; 1049 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1050 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1051 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1052 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1053 v += 16; 1054 } 1055 } 1056 break; 1057 case 5: 1058 for ( i=0; i<mbs; i++ ) { 1059 n = ii[1] - ii[0]; ii++; 1060 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1061 x4 = xb[3]; x5 = xb[4]; 1062 ib = idx + ai[i]; 1063 for ( j=0; j<n; j++ ) { 1064 rval = ib[j]*5; 1065 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1066 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1067 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1068 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1069 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1070 v += 25; 1071 } 1072 } 1073 break; 1074 /* block sizes larger then 5 by 5 are handled by BLAS */ 1075 default: { 1076 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1077 if (!a->mult_work) { 1078 k = PetscMax(a->m,a->n); 1079 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1080 CHKPTRQ(a->mult_work); 1081 } 1082 work = a->mult_work; 1083 for ( i=0; i<mbs; i++ ) { 1084 n = ii[1] - ii[0]; ii++; 1085 ncols = n*bs; 1086 PetscMemzero(work,ncols*sizeof(Scalar)); 1087 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1088 v += n*bs2; 1089 x += bs; 1090 workt = work; 1091 for ( j=0; j<n; j++ ) { 1092 zb = z + bs*(*idx++); 1093 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1094 workt += bs; 1095 } 1096 } 1097 } 1098 } 1099 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1100 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1101 PLogFlops(2*a->nz*a->bs2 - a->n); 1102 return 0; 1103 } 1104 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1105 { 1106 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1107 Scalar *xg,*zg,*zb; 1108 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1109 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1110 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1111 1112 1113 1114 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 1115 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 1116 1117 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1118 else PetscMemzero(z,N*sizeof(Scalar)); 1119 1120 1121 idx = a->j; 1122 v = a->a; 1123 ii = a->i; 1124 1125 switch (bs) { 1126 case 1: 1127 for ( i=0; i<mbs; i++ ) { 1128 n = ii[1] - ii[0]; ii++; 1129 xb = x + i; x1 = xb[0]; 1130 ib = idx + ai[i]; 1131 for ( j=0; j<n; j++ ) { 1132 rval = ib[j]; 1133 z[rval] += *v++ * x1; 1134 } 1135 } 1136 break; 1137 case 2: 1138 for ( i=0; i<mbs; i++ ) { 1139 n = ii[1] - ii[0]; ii++; 1140 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1141 ib = idx + ai[i]; 1142 for ( j=0; j<n; j++ ) { 1143 rval = ib[j]*2; 1144 z[rval++] += v[0]*x1 + v[1]*x2; 1145 z[rval++] += v[2]*x1 + v[3]*x2; 1146 v += 4; 1147 } 1148 } 1149 break; 1150 case 3: 1151 for ( i=0; i<mbs; i++ ) { 1152 n = ii[1] - ii[0]; ii++; 1153 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1154 ib = idx + ai[i]; 1155 for ( j=0; j<n; j++ ) { 1156 rval = ib[j]*3; 1157 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1158 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1159 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1160 v += 9; 1161 } 1162 } 1163 break; 1164 case 4: 1165 for ( i=0; i<mbs; i++ ) { 1166 n = ii[1] - ii[0]; ii++; 1167 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1168 ib = idx + ai[i]; 1169 for ( j=0; j<n; j++ ) { 1170 rval = ib[j]*4; 1171 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1172 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1173 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1174 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1175 v += 16; 1176 } 1177 } 1178 break; 1179 case 5: 1180 for ( i=0; i<mbs; i++ ) { 1181 n = ii[1] - ii[0]; ii++; 1182 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1183 x4 = xb[3]; x5 = xb[4]; 1184 ib = idx + ai[i]; 1185 for ( j=0; j<n; j++ ) { 1186 rval = ib[j]*5; 1187 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1188 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1189 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1190 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1191 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1192 v += 25; 1193 } 1194 } 1195 break; 1196 /* block sizes larger then 5 by 5 are handled by BLAS */ 1197 default: { 1198 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1199 if (!a->mult_work) { 1200 k = PetscMax(a->m,a->n); 1201 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1202 CHKPTRQ(a->mult_work); 1203 } 1204 work = a->mult_work; 1205 for ( i=0; i<mbs; i++ ) { 1206 n = ii[1] - ii[0]; ii++; 1207 ncols = n*bs; 1208 PetscMemzero(work,ncols*sizeof(Scalar)); 1209 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1210 v += n*bs2; 1211 x += bs; 1212 workt = work; 1213 for ( j=0; j<n; j++ ) { 1214 zb = z + bs*(*idx++); 1215 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1216 workt += bs; 1217 } 1218 } 1219 } 1220 } 1221 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1222 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1223 PLogFlops(2*a->nz*a->bs2); 1224 return 0; 1225 } 1226 1227 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 1228 { 1229 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1230 if (nz) *nz = a->bs2*a->nz; 1231 if (nza) *nza = a->maxnz; 1232 if (mem) *mem = (int)A->mem; 1233 return 0; 1234 } 1235 1236 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1237 { 1238 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1239 1240 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 1241 1242 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1243 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1244 (a->nz != b->nz)) { 1245 *flg = PETSC_FALSE; return 0; 1246 } 1247 1248 /* if the a->i are the same */ 1249 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1250 *flg = PETSC_FALSE; return 0; 1251 } 1252 1253 /* if a->j are the same */ 1254 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1255 *flg = PETSC_FALSE; return 0; 1256 } 1257 1258 /* if a->a are the same */ 1259 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1260 *flg = PETSC_FALSE; return 0; 1261 } 1262 *flg = PETSC_TRUE; 1263 return 0; 1264 1265 } 1266 1267 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1268 { 1269 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1270 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1271 Scalar *x, zero = 0.0,*aa,*aa_j; 1272 1273 bs = a->bs; 1274 aa = a->a; 1275 ai = a->i; 1276 aj = a->j; 1277 ambs = a->mbs; 1278 bs2 = a->bs2; 1279 1280 VecSet(&zero,v); 1281 VecGetArray(v,&x); VecGetLocalSize(v,&n); 1282 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 1283 for ( i=0; i<ambs; i++ ) { 1284 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1285 if (aj[j] == i) { 1286 row = i*bs; 1287 aa_j = aa+j*bs2; 1288 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1289 break; 1290 } 1291 } 1292 } 1293 return 0; 1294 } 1295 1296 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1297 { 1298 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1299 Scalar *l,*r,x,*v,*aa,*li,*ri; 1300 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1301 1302 ai = a->i; 1303 aj = a->j; 1304 aa = a->a; 1305 m = a->m; 1306 n = a->n; 1307 bs = a->bs; 1308 mbs = a->mbs; 1309 bs2 = a->bs2; 1310 if (ll) { 1311 VecGetArray(ll,&l); VecGetSize(ll,&lm); 1312 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 1313 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1314 M = ai[i+1] - ai[i]; 1315 li = l + i*bs; 1316 v = aa + bs2*ai[i]; 1317 for ( j=0; j<M; j++ ) { /* for each block */ 1318 for ( k=0; k<bs2; k++ ) { 1319 (*v++) *= li[k%bs]; 1320 } 1321 } 1322 } 1323 } 1324 1325 if (rr) { 1326 VecGetArray(rr,&r); VecGetSize(rr,&rn); 1327 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 1328 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1329 M = ai[i+1] - ai[i]; 1330 v = aa + bs2*ai[i]; 1331 for ( j=0; j<M; j++ ) { /* for each block */ 1332 ri = r + bs*aj[ai[i]+j]; 1333 for ( k=0; k<bs; k++ ) { 1334 x = ri[k]; 1335 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1336 } 1337 } 1338 } 1339 } 1340 return 0; 1341 } 1342 1343 1344 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1345 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1346 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1347 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1348 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1349 1350 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1351 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1352 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1353 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1354 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1355 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1356 1357 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1358 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1359 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1360 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1361 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1362 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1363 1364 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1365 { 1366 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1367 Scalar *v = a->a; 1368 double sum = 0.0; 1369 int i,nz=a->nz,bs2=a->bs2; 1370 1371 if (type == NORM_FROBENIUS) { 1372 for (i=0; i< bs2*nz; i++ ) { 1373 #if defined(PETSC_COMPLEX) 1374 sum += real(conj(*v)*(*v)); v++; 1375 #else 1376 sum += (*v)*(*v); v++; 1377 #endif 1378 } 1379 *norm = sqrt(sum); 1380 } 1381 else { 1382 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 1383 } 1384 return 0; 1385 } 1386 1387 /* 1388 note: This can only work for identity for row and col. It would 1389 be good to check this and otherwise generate an error. 1390 */ 1391 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1392 { 1393 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1394 Mat outA; 1395 int ierr; 1396 1397 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 1398 1399 outA = inA; 1400 inA->factor = FACTOR_LU; 1401 a->row = row; 1402 a->col = col; 1403 1404 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1405 1406 if (!a->diag) { 1407 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1408 } 1409 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1410 return 0; 1411 } 1412 1413 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1414 { 1415 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1416 int one = 1, totalnz = a->bs2*a->nz; 1417 BLscal_( &totalnz, alpha, a->a, &one ); 1418 PLogFlops(totalnz); 1419 return 0; 1420 } 1421 1422 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1423 { 1424 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1425 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1426 int *ai = a->i, *ailen = a->ilen; 1427 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1428 Scalar *ap, *aa = a->a, zero = 0.0; 1429 1430 for ( k=0; k<m; k++ ) { /* loop over rows */ 1431 row = im[k]; brow = row/bs; 1432 if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 1433 if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 1434 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1435 nrow = ailen[brow]; 1436 for ( l=0; l<n; l++ ) { /* loop over columns */ 1437 if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 1438 if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 1439 col = in[l] ; 1440 bcol = col/bs; 1441 cidx = col%bs; 1442 ridx = row%bs; 1443 high = nrow; 1444 low = 0; /* assume unsorted */ 1445 while (high-low > 5) { 1446 t = (low+high)/2; 1447 if (rp[t] > bcol) high = t; 1448 else low = t; 1449 } 1450 for ( i=low; i<high; i++ ) { 1451 if (rp[i] > bcol) break; 1452 if (rp[i] == bcol) { 1453 *v++ = ap[bs2*i+bs*cidx+ridx]; 1454 goto finished; 1455 } 1456 } 1457 *v++ = zero; 1458 finished:; 1459 } 1460 } 1461 return 0; 1462 } 1463 1464 /* -------------------------------------------------------------------*/ 1465 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1466 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1467 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1468 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 1469 MatSolve_SeqBAIJ_N,0, 1470 0,0, 1471 MatLUFactor_SeqBAIJ,0, 1472 0, 1473 MatTranspose_SeqBAIJ, 1474 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 1475 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1476 0,MatAssemblyEnd_SeqBAIJ, 1477 0, 1478 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 1479 MatGetReordering_SeqBAIJ, 1480 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1481 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1482 MatILUFactorSymbolic_SeqBAIJ,0, 1483 0,0,/* MatConvert_SeqBAIJ */ 0, 1484 MatGetSubMatrix_SeqBAIJ,0, 1485 MatConvertSameType_SeqBAIJ,0,0, 1486 MatILUFactor_SeqBAIJ,0,0, 1487 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 1488 MatGetValues_SeqBAIJ,0, 1489 0,MatScale_SeqBAIJ, 1490 0}; 1491 1492 /*@C 1493 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1494 compressed row) format. For good matrix assembly performance the 1495 user should preallocate the matrix storage by setting the parameter nz 1496 (or the array nzz). By setting these parameters accurately, performance 1497 during matrix assembly can be increased by more than a factor of 50. 1498 1499 Input Parameters: 1500 . comm - MPI communicator, set to MPI_COMM_SELF 1501 . bs - size of block 1502 . m - number of rows 1503 . n - number of columns 1504 . nz - number of block nonzeros per block row (same for all rows) 1505 . nzz - array containing the number of block nonzeros in the various block rows 1506 (possibly different for each block row) or PETSC_NULL 1507 1508 Output Parameter: 1509 . A - the matrix 1510 1511 Notes: 1512 The block AIJ format is fully compatible with standard Fortran 77 1513 storage. That is, the stored row and column indices can begin at 1514 either one (as in Fortran) or zero. See the users' manual for details. 1515 1516 Specify the preallocated storage with either nz or nnz (not both). 1517 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1518 allocation. For additional details, see the users manual chapter on 1519 matrices and the file $(PETSC_DIR)/Performance. 1520 1521 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1522 @*/ 1523 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1524 { 1525 Mat B; 1526 Mat_SeqBAIJ *b; 1527 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs; 1528 1529 if (mbs*bs!=m || nbs*bs!=n) 1530 SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 1531 1532 *A = 0; 1533 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 1534 PLogObjectCreate(B); 1535 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 1536 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 1537 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1538 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 1539 if (!flg) { 1540 switch (bs) { 1541 case 1: 1542 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1543 B->ops.solve = MatSolve_SeqBAIJ_1; 1544 B->ops.mult = MatMult_SeqBAIJ_1; 1545 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 1546 break; 1547 case 2: 1548 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1549 B->ops.solve = MatSolve_SeqBAIJ_2; 1550 B->ops.mult = MatMult_SeqBAIJ_2; 1551 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 1552 break; 1553 case 3: 1554 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1555 B->ops.solve = MatSolve_SeqBAIJ_3; 1556 B->ops.mult = MatMult_SeqBAIJ_3; 1557 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 1558 break; 1559 case 4: 1560 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1561 B->ops.solve = MatSolve_SeqBAIJ_4; 1562 B->ops.mult = MatMult_SeqBAIJ_4; 1563 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 1564 break; 1565 case 5: 1566 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1567 B->ops.solve = MatSolve_SeqBAIJ_5; 1568 B->ops.mult = MatMult_SeqBAIJ_5; 1569 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 1570 break; 1571 } 1572 } 1573 B->destroy = MatDestroy_SeqBAIJ; 1574 B->view = MatView_SeqBAIJ; 1575 B->factor = 0; 1576 B->lupivotthreshold = 1.0; 1577 b->row = 0; 1578 b->col = 0; 1579 b->reallocs = 0; 1580 1581 b->m = m; B->m = m; B->M = m; 1582 b->n = n; B->n = n; B->N = n; 1583 b->mbs = mbs; 1584 b->nbs = nbs; 1585 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 1586 if (nnz == PETSC_NULL) { 1587 if (nz == PETSC_DEFAULT) nz = 5; 1588 else if (nz <= 0) nz = 1; 1589 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1590 nz = nz*mbs; 1591 } 1592 else { 1593 nz = 0; 1594 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1595 } 1596 1597 /* allocate the matrix space */ 1598 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 1599 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 1600 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 1601 b->j = (int *) (b->a + nz*bs2); 1602 PetscMemzero(b->j,nz*sizeof(int)); 1603 b->i = b->j + nz; 1604 b->singlemalloc = 1; 1605 1606 b->i[0] = 0; 1607 for (i=1; i<mbs+1; i++) { 1608 b->i[i] = b->i[i-1] + b->imax[i-1]; 1609 } 1610 1611 /* b->ilen will count nonzeros in each block row so far. */ 1612 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1613 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1614 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1615 1616 b->bs = bs; 1617 b->bs2 = bs2; 1618 b->mbs = mbs; 1619 b->nz = 0; 1620 b->maxnz = nz*bs2; 1621 b->sorted = 0; 1622 b->roworiented = 1; 1623 b->nonew = 0; 1624 b->diag = 0; 1625 b->solve_work = 0; 1626 b->mult_work = 0; 1627 b->spptr = 0; 1628 *A = B; 1629 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 1630 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 1631 return 0; 1632 } 1633 1634 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 1635 { 1636 Mat C; 1637 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1638 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1639 1640 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 1641 1642 *B = 0; 1643 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 1644 PLogObjectCreate(C); 1645 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1646 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1647 C->destroy = MatDestroy_SeqBAIJ; 1648 C->view = MatView_SeqBAIJ; 1649 C->factor = A->factor; 1650 c->row = 0; 1651 c->col = 0; 1652 C->assembled = PETSC_TRUE; 1653 1654 c->m = C->m = a->m; 1655 c->n = C->n = a->n; 1656 C->M = a->m; 1657 C->N = a->n; 1658 1659 c->bs = a->bs; 1660 c->bs2 = a->bs2; 1661 c->mbs = a->mbs; 1662 c->nbs = a->nbs; 1663 1664 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1665 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1666 for ( i=0; i<mbs; i++ ) { 1667 c->imax[i] = a->imax[i]; 1668 c->ilen[i] = a->ilen[i]; 1669 } 1670 1671 /* allocate the matrix space */ 1672 c->singlemalloc = 1; 1673 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 1674 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1675 c->j = (int *) (c->a + nz*bs2); 1676 c->i = c->j + nz; 1677 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1678 if (mbs > 0) { 1679 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 1680 if (cpvalues == COPY_VALUES) { 1681 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 1682 } 1683 } 1684 1685 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1686 c->sorted = a->sorted; 1687 c->roworiented = a->roworiented; 1688 c->nonew = a->nonew; 1689 1690 if (a->diag) { 1691 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1692 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1693 for ( i=0; i<mbs; i++ ) { 1694 c->diag[i] = a->diag[i]; 1695 } 1696 } 1697 else c->diag = 0; 1698 c->nz = a->nz; 1699 c->maxnz = a->maxnz; 1700 c->solve_work = 0; 1701 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1702 c->mult_work = 0; 1703 *B = C; 1704 return 0; 1705 } 1706 1707 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1708 { 1709 Mat_SeqBAIJ *a; 1710 Mat B; 1711 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1712 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1713 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1714 int *masked, nmask,tmp,bs2,ishift; 1715 Scalar *aa; 1716 MPI_Comm comm = ((PetscObject) viewer)->comm; 1717 1718 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1719 bs2 = bs*bs; 1720 1721 MPI_Comm_size(comm,&size); 1722 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 1723 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1724 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1725 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 1726 M = header[1]; N = header[2]; nz = header[3]; 1727 1728 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 1729 1730 /* 1731 This code adds extra rows to make sure the number of rows is 1732 divisible by the blocksize 1733 */ 1734 mbs = M/bs; 1735 extra_rows = bs - M + bs*(mbs); 1736 if (extra_rows == bs) extra_rows = 0; 1737 else mbs++; 1738 if (extra_rows) { 1739 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize"); 1740 } 1741 1742 /* read in row lengths */ 1743 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1744 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1745 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1746 1747 /* read in column indices */ 1748 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 1749 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 1750 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1751 1752 /* loop over row lengths determining block row lengths */ 1753 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1754 PetscMemzero(browlengths,mbs*sizeof(int)); 1755 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 1756 PetscMemzero(mask,mbs*sizeof(int)); 1757 masked = mask + mbs; 1758 rowcount = 0; nzcount = 0; 1759 for ( i=0; i<mbs; i++ ) { 1760 nmask = 0; 1761 for ( j=0; j<bs; j++ ) { 1762 kmax = rowlengths[rowcount]; 1763 for ( k=0; k<kmax; k++ ) { 1764 tmp = jj[nzcount++]/bs; 1765 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1766 } 1767 rowcount++; 1768 } 1769 browlengths[i] += nmask; 1770 /* zero out the mask elements we set */ 1771 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1772 } 1773 1774 /* create our matrix */ 1775 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 1776 CHKERRQ(ierr); 1777 B = *A; 1778 a = (Mat_SeqBAIJ *) B->data; 1779 1780 /* set matrix "i" values */ 1781 a->i[0] = 0; 1782 for ( i=1; i<= mbs; i++ ) { 1783 a->i[i] = a->i[i-1] + browlengths[i-1]; 1784 a->ilen[i-1] = browlengths[i-1]; 1785 } 1786 a->nz = 0; 1787 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1788 1789 /* read in nonzero values */ 1790 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1791 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 1792 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1793 1794 /* set "a" and "j" values into matrix */ 1795 nzcount = 0; jcount = 0; 1796 for ( i=0; i<mbs; i++ ) { 1797 nzcountb = nzcount; 1798 nmask = 0; 1799 for ( j=0; j<bs; j++ ) { 1800 kmax = rowlengths[i*bs+j]; 1801 for ( k=0; k<kmax; k++ ) { 1802 tmp = jj[nzcount++]/bs; 1803 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1804 } 1805 rowcount++; 1806 } 1807 /* sort the masked values */ 1808 PetscSortInt(nmask,masked); 1809 1810 /* set "j" values into matrix */ 1811 maskcount = 1; 1812 for ( j=0; j<nmask; j++ ) { 1813 a->j[jcount++] = masked[j]; 1814 mask[masked[j]] = maskcount++; 1815 } 1816 /* set "a" values into matrix */ 1817 ishift = bs2*a->i[i]; 1818 for ( j=0; j<bs; j++ ) { 1819 kmax = rowlengths[i*bs+j]; 1820 for ( k=0; k<kmax; k++ ) { 1821 tmp = jj[nzcountb]/bs ; 1822 block = mask[tmp] - 1; 1823 point = jj[nzcountb] - bs*tmp; 1824 idx = ishift + bs2*block + j + bs*point; 1825 a->a[idx] = aa[nzcountb++]; 1826 } 1827 } 1828 /* zero out the mask elements we set */ 1829 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1830 } 1831 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 1832 1833 PetscFree(rowlengths); 1834 PetscFree(browlengths); 1835 PetscFree(aa); 1836 PetscFree(jj); 1837 PetscFree(mask); 1838 1839 B->assembled = PETSC_TRUE; 1840 1841 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 1842 if (flg) { 1843 Viewer tviewer; 1844 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1845 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 1846 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1847 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1848 } 1849 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 1850 if (flg) { 1851 Viewer tviewer; 1852 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1853 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 1854 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1855 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1856 } 1857 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 1858 if (flg) { 1859 Viewer tviewer; 1860 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1861 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1862 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1863 } 1864 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 1865 if (flg) { 1866 Viewer tviewer; 1867 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1868 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 1869 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1870 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1871 } 1872 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 1873 if (flg) { 1874 Viewer tviewer; 1875 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 1876 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 1877 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 1878 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1879 } 1880 return 0; 1881 } 1882 1883 1884 1885