1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.28 1996/04/05 21:52:51 balay Exp balay $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "vec/vecimpl.h" 12 #include "inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 MatReorderingRegisterAll(); 41 ishift = a->indexshift; 42 oshift = -MatReorderingIndexShift[(int)type]; 43 if (MatReorderingRequiresSymmetric[(int)type]) { 44 ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja); 45 CHKERRQ(ierr); 46 ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 47 PetscFree(ia); PetscFree(ja); 48 } else { 49 if (ishift == oshift) { 50 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 51 } 52 else if (ishift == -1) { 53 /* temporarily subtract 1 from i and j indices */ 54 int nz = a->i[a->n] - 1; 55 for ( i=0; i<nz; i++ ) a->j[i]--; 56 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 57 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 58 for ( i=0; i<nz; i++ ) a->j[i]++; 59 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 60 } else { 61 /* temporarily add 1 to i and j indices */ 62 int nz = a->i[a->n] - 1; 63 for ( i=0; i<nz; i++ ) a->j[i]++; 64 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 65 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 66 for ( i=0; i<nz; i++ ) a->j[i]--; 67 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 68 } 69 } 70 return 0; 71 } 72 73 /* 74 Adds diagonal pointers to sparse matrix structure. 75 */ 76 77 int MatMarkDiag_SeqBAIJ(Mat A) 78 { 79 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 80 int i,j, *diag, m = a->mbs; 81 82 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 83 PLogObjectMemory(A,(m+1)*sizeof(int)); 84 for ( i=0; i<m; i++ ) { 85 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 86 if (a->j[j] == i) { 87 diag[i] = j; 88 break; 89 } 90 } 91 } 92 a->diag = diag; 93 return 0; 94 } 95 96 #include "draw.h" 97 #include "pinclude/pviewer.h" 98 #include "sys.h" 99 100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 101 { 102 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 103 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=bs*bs; 104 Scalar *aa; 105 106 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 107 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 108 col_lens[0] = MAT_COOKIE; 109 col_lens[1] = a->m; 110 col_lens[2] = a->n; 111 col_lens[3] = a->nz*bs2; 112 113 /* store lengths of each row and write (including header) to file */ 114 count = 0; 115 for ( i=0; i<a->mbs; i++ ) { 116 for ( j=0; j<bs; j++ ) { 117 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 118 } 119 } 120 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 121 PetscFree(col_lens); 122 123 /* store column indices (zero start index) */ 124 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 125 count = 0; 126 for ( i=0; i<a->mbs; i++ ) { 127 for ( j=0; j<bs; j++ ) { 128 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 129 for ( l=0; l<bs; l++ ) { 130 jj[count++] = bs*a->j[k] + l; 131 } 132 } 133 } 134 } 135 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 136 PetscFree(jj); 137 138 /* store nonzero values */ 139 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 140 count = 0; 141 for ( i=0; i<a->mbs; i++ ) { 142 for ( j=0; j<bs; j++ ) { 143 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 144 for ( l=0; l<bs; l++ ) { 145 aa[count++] = a->a[bs2*k + l*bs + j]; 146 } 147 } 148 } 149 } 150 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 151 PetscFree(aa); 152 return 0; 153 } 154 155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 156 { 157 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 158 int ierr, i,j,format,bs = a->bs,k,l,bs2=bs*bs; 159 FILE *fd; 160 char *outputname; 161 162 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 163 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 164 ierr = ViewerGetFormat(viewer,&format); 165 if (format == ASCII_FORMAT_INFO) { 166 /* no need to print additional information */ ; 167 } 168 else if (format == ASCII_FORMAT_MATLAB) { 169 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 170 } 171 else { 172 for ( i=0; i<a->mbs; i++ ) { 173 for ( j=0; j<bs; j++ ) { 174 fprintf(fd,"row %d:",i*bs+j); 175 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 176 for ( l=0; l<bs; l++ ) { 177 #if defined(PETSC_COMPLEX) 178 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 179 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 180 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 181 } 182 else { 183 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 184 } 185 #else 186 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 187 #endif 188 } 189 } 190 fprintf(fd,"\n"); 191 } 192 } 193 } 194 fflush(fd); 195 return 0; 196 } 197 198 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 199 { 200 Mat A = (Mat) obj; 201 ViewerType vtype; 202 int ierr; 203 204 if (!viewer) { 205 viewer = STDOUT_VIEWER_SELF; 206 } 207 208 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 209 if (vtype == MATLAB_VIEWER) { 210 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 211 } 212 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 213 return MatView_SeqBAIJ_ASCII(A,viewer); 214 } 215 else if (vtype == BINARY_FILE_VIEWER) { 216 return MatView_SeqBAIJ_Binary(A,viewer); 217 } 218 else if (vtype == DRAW_VIEWER) { 219 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 220 } 221 return 0; 222 } 223 224 #define CHUNKSIZE 1 225 226 /* This version has row oriented v */ 227 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 228 { 229 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 230 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 231 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 232 int *aj=a->j,nonew=a->nonew,shift=a->indexshift,bs=a->bs,brow,bcol; 233 int ridx,cidx,bs2=bs*bs; 234 Scalar *ap,value,*aa=a->a,*bap; 235 236 for ( k=0; k<m; k++ ) { /* loop over added rows */ 237 row = im[k]; brow = row/bs; 238 if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 239 if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 240 rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 241 rmax = imax[brow]; nrow = ailen[brow]; 242 low = 0; 243 for ( l=0; l<n; l++ ) { /* loop over added columns */ 244 if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 245 if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 246 col = in[l] - shift; bcol = col/bs; 247 ridx = row % bs; cidx = col % bs; 248 if (roworiented) { 249 value = *v++; 250 } 251 else { 252 value = v[k + l*m]; 253 } 254 if (!sorted) low = 0; high = nrow; 255 while (high-low > 5) { 256 t = (low+high)/2; 257 if (rp[t] > bcol) high = t; 258 else low = t; 259 } 260 for ( i=low; i<high; i++ ) { 261 if (rp[i] > bcol) break; 262 if (rp[i] == bcol) { 263 bap = ap + bs2*i + bs*cidx + ridx; 264 if (is == ADD_VALUES) *bap += value; 265 else *bap = value; 266 goto noinsert; 267 } 268 } 269 if (nonew) goto noinsert; 270 if (nrow >= rmax) { 271 /* there is no extra room in row, therefore enlarge */ 272 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 273 Scalar *new_a; 274 275 /* malloc new storage space */ 276 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 277 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 278 new_j = (int *) (new_a + bs2*new_nz); 279 new_i = new_j + new_nz; 280 281 /* copy over old data into new slots */ 282 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 283 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 284 PetscMemcpy(new_j,aj,(ai[brow]+nrow+shift)*sizeof(int)); 285 len = (new_nz - CHUNKSIZE - ai[brow] - nrow - shift); 286 PetscMemcpy(new_j+ai[brow]+shift+nrow+CHUNKSIZE,aj+ai[brow]+shift+nrow, 287 len*sizeof(int)); 288 PetscMemcpy(new_a,aa,(ai[brow]+nrow+shift)*bs2*sizeof(Scalar)); 289 PetscMemzero(new_a+bs2*(ai[brow]+shift+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 290 PetscMemcpy(new_a+bs2*(ai[brow]+shift+nrow+CHUNKSIZE), 291 aa+bs2*(ai[brow]+shift+nrow),bs2*len*sizeof(Scalar)); 292 /* free up old matrix storage */ 293 PetscFree(a->a); 294 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 295 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 296 a->singlemalloc = 1; 297 298 rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 299 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 300 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 301 a->maxnz += bs2*CHUNKSIZE; 302 a->reallocs++; 303 a->nz+=bs2; 304 } 305 N = nrow++ - 1; 306 /* shift up all the later entries in this row */ 307 for ( ii=N; ii>=i; ii-- ) { 308 rp[ii+1] = rp[ii]; 309 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 310 } 311 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 312 rp[i] = bcol; 313 ap[bs2*i + bs*cidx + ridx] = value; 314 noinsert:; 315 low = i; 316 } 317 ailen[brow] = nrow; 318 } 319 return 0; 320 } 321 322 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 323 { 324 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 325 *m = a->m; *n = a->n; 326 return 0; 327 } 328 329 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 330 { 331 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 332 *m = 0; *n = a->m; 333 return 0; 334 } 335 336 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 337 { 338 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 339 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 340 Scalar *aa,*v_i,*aa_i; 341 342 bs = a->bs; 343 ai = a->i; 344 aj = a->j; 345 aa = a->a; 346 bs2 = bs*bs; 347 348 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 349 350 bn = row/bs; /* Block number */ 351 bp = row % bs; /* Block Position */ 352 M = ai[bn+1] - ai[bn]; 353 *nz = bs*M; 354 355 if (v) { 356 *v = 0; 357 if (*nz) { 358 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 359 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 360 v_i = *v + i*bs; 361 aa_i = aa + bs2*(ai[bn] + i); 362 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 363 } 364 } 365 } 366 367 if (idx) { 368 *idx = 0; 369 if (*nz) { 370 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 371 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 372 idx_i = *idx + i*bs; 373 itmp = bs*aj[ai[bn] + i]; 374 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 375 } 376 } 377 } 378 return 0; 379 } 380 381 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 382 { 383 if (idx) {if (*idx) PetscFree(*idx);} 384 if (v) {if (*v) PetscFree(*v);} 385 return 0; 386 } 387 388 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 389 { 390 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 391 Mat C; 392 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 393 int shift=a->indexshift,*rows,*cols,bs2=bs*bs; 394 Scalar *array=a->a; 395 396 if (B==PETSC_NULL && mbs!=nbs) 397 SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 398 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 399 PetscMemzero(col,(1+nbs)*sizeof(int)); 400 if (shift) { 401 for ( i=0; i<ai[mbs]-1; i++ ) aj[i] -= 1; 402 } 403 for ( i=0; i<ai[mbs]+shift; i++ ) col[aj[i]] += 1; 404 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 405 PetscFree(col); 406 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 407 cols = rows + bs; 408 for ( i=0; i<mbs; i++ ) { 409 cols[0] = i*bs; 410 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 411 len = ai[i+1] - ai[i]; 412 for ( j=0; j<len; j++ ) { 413 rows[0] = (*aj++)*bs; 414 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 415 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 416 array += bs2; 417 } 418 } 419 PetscFree(rows); 420 if (shift) { 421 for ( i=0; i<ai[mbs]-1; i++ ) aj[i] += 1; 422 } 423 424 ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 425 ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 426 427 if (B != PETSC_NULL) { 428 *B = C; 429 } else { 430 /* This isn't really an in-place transpose */ 431 PetscFree(a->a); 432 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 433 if (a->diag) PetscFree(a->diag); 434 if (a->ilen) PetscFree(a->ilen); 435 if (a->imax) PetscFree(a->imax); 436 if (a->solve_work) PetscFree(a->solve_work); 437 PetscFree(a); 438 PetscMemcpy(A,C,sizeof(struct _Mat)); 439 PetscHeaderDestroy(C); 440 } 441 return 0; 442 } 443 444 445 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 446 { 447 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 448 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 449 int m = a->m, *ip, N, *ailen = a->ilen,shift = a->indexshift; 450 int bs = a->bs, mbs = a->mbs, bs2 = bs*bs; 451 Scalar *aa = a->a, *ap; 452 453 if (mode == FLUSH_ASSEMBLY) return 0; 454 455 for ( i=1; i<mbs; i++ ) { 456 /* move each row back by the amount of empty slots (fshift) before it*/ 457 fshift += imax[i-1] - ailen[i-1]; 458 if (fshift) { 459 ip = aj + ai[i] + shift; ap = aa + bs2*ai[i] + shift; 460 N = ailen[i]; 461 for ( j=0; j<N; j++ ) { 462 ip[j-fshift] = ip[j]; 463 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 464 } 465 } 466 ai[i] = ai[i-1] + ailen[i-1]; 467 } 468 if (mbs) { 469 fshift += imax[mbs-1] - ailen[mbs-1]; 470 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 471 } 472 /* reset ilen and imax for each row */ 473 for ( i=0; i<mbs; i++ ) { 474 ailen[i] = imax[i] = ai[i+1] - ai[i]; 475 } 476 a->nz = (ai[m] + shift)*bs2; 477 478 /* diagonals may have moved, so kill the diagonal pointers */ 479 if (fshift && a->diag) { 480 PetscFree(a->diag); 481 PLogObjectMemory(A,-(m+1)*sizeof(int)); 482 a->diag = 0; 483 } 484 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneeded storage space %d used %d rows %d\n", 485 fshift,a->nz,m); 486 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n", 487 a->reallocs); 488 return 0; 489 } 490 491 static int MatZeroEntries_SeqBAIJ(Mat A) 492 { 493 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 494 PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar)); 495 return 0; 496 } 497 498 int MatDestroy_SeqBAIJ(PetscObject obj) 499 { 500 Mat A = (Mat) obj; 501 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 502 503 #if defined(PETSC_LOG) 504 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 505 #endif 506 PetscFree(a->a); 507 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 508 if (a->diag) PetscFree(a->diag); 509 if (a->ilen) PetscFree(a->ilen); 510 if (a->imax) PetscFree(a->imax); 511 if (a->solve_work) PetscFree(a->solve_work); 512 if (a->mult_work) PetscFree(a->mult_work); 513 PetscFree(a); 514 PLogObjectDestroy(A); 515 PetscHeaderDestroy(A); 516 return 0; 517 } 518 519 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 520 { 521 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 522 if (op == ROW_ORIENTED) a->roworiented = 1; 523 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 524 else if (op == COLUMNS_SORTED) a->sorted = 1; 525 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 526 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 527 else if (op == ROWS_SORTED || 528 op == SYMMETRIC_MATRIX || 529 op == STRUCTURALLY_SYMMETRIC_MATRIX || 530 op == YES_NEW_DIAGONALS) 531 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 532 else if (op == NO_NEW_DIAGONALS) 533 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 534 else 535 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 536 return 0; 537 } 538 539 540 /* -------------------------------------------------------*/ 541 /* Should check that shapes of vectors and matrices match */ 542 /* -------------------------------------------------------*/ 543 #include "pinclude/plapack.h" 544 545 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy) 546 { 547 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 548 Scalar *xg,*yg; 549 register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5; 550 register Scalar x1,x2,x3,x4,x5; 551 int mbs = a->mbs, m = a->m, i, *idx,*ii; 552 int bs = a->bs,j,n,bs2 = bs*bs; 553 554 VecGetArray(xx,&xg); x = xg; VecGetArray(yy,&yg); y = yg; 555 PetscMemzero(y,m*sizeof(Scalar)); 556 x = x; 557 idx = a->j; 558 v = a->a; 559 ii = a->i; 560 561 switch (bs) { 562 case 1: 563 for ( i=0; i<m; i++ ) { 564 n = ii[1] - ii[0]; ii++; 565 sum = 0.0; 566 while (n--) sum += *v++ * x[*idx++]; 567 y[i] = sum; 568 } 569 break; 570 case 2: 571 for ( i=0; i<mbs; i++ ) { 572 n = ii[1] - ii[0]; ii++; 573 sum1 = 0.0; sum2 = 0.0; 574 for ( j=0; j<n; j++ ) { 575 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 576 sum1 += v[0]*x1 + v[2]*x2; 577 sum2 += v[1]*x1 + v[3]*x2; 578 v += 4; 579 } 580 y[0] += sum1; y[1] += sum2; 581 y += 2; 582 } 583 break; 584 case 3: 585 for ( i=0; i<mbs; i++ ) { 586 n = ii[1] - ii[0]; ii++; 587 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 588 for ( j=0; j<n; j++ ) { 589 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 590 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 591 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 592 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 593 v += 9; 594 } 595 y[0] += sum1; y[1] += sum2; y[2] += sum3; 596 y += 3; 597 } 598 break; 599 case 4: 600 for ( i=0; i<mbs; i++ ) { 601 n = ii[1] - ii[0]; ii++; 602 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 603 for ( j=0; j<n; j++ ) { 604 xb = x + 4*(*idx++); 605 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 606 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 607 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 608 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 609 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 610 v += 16; 611 } 612 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; 613 y += 4; 614 } 615 break; 616 case 5: 617 for ( i=0; i<mbs; i++ ) { 618 n = ii[1] - ii[0]; ii++; 619 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 620 for ( j=0; j<n; j++ ) { 621 xb = x + 5*(*idx++); 622 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 623 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 624 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 625 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 626 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 627 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 628 v += 25; 629 } 630 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5; 631 y += 5; 632 } 633 break; 634 /* block sizes larger then 5 by 5 are handled by BLAS */ 635 default: { 636 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 637 if (!a->mult_work) { 638 a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar)); 639 CHKPTRQ(a->mult_work); 640 } 641 work = a->mult_work; 642 for ( i=0; i<mbs; i++ ) { 643 n = ii[1] - ii[0]; ii++; 644 ncols = n*bs; 645 workt = work; 646 for ( j=0; j<n; j++ ) { 647 xb = x + bs*(*idx++); 648 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 649 workt += bs; 650 } 651 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One); 652 v += n*bs2; 653 y += bs; 654 } 655 } 656 } 657 PLogFlops(2*bs2*a->nz - m); 658 return 0; 659 } 660 661 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 662 { 663 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 664 if (nz) *nz = a->bs*a->bs*a->nz; 665 if (nza) *nza = a->maxnz; 666 if (mem) *mem = (int)A->mem; 667 return 0; 668 } 669 670 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 671 { 672 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 673 674 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 675 676 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 677 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 678 (a->nz != b->nz)||(a->indexshift != b->indexshift)) { 679 *flg = PETSC_FALSE; return 0; 680 } 681 682 /* if the a->i are the same */ 683 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 684 *flg = PETSC_FALSE; return 0; 685 } 686 687 /* if a->j are the same */ 688 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 689 *flg = PETSC_FALSE; return 0; 690 } 691 692 /* if a->a are the same */ 693 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 694 *flg = PETSC_FALSE; return 0; 695 } 696 *flg = PETSC_TRUE; 697 return 0; 698 699 } 700 701 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 702 { 703 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 704 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 705 Scalar *x, zero = 0.0,*aa,*aa_j; 706 707 bs = a->bs; 708 aa = a->a; 709 ai = a->i; 710 aj = a->j; 711 ambs = a->mbs; 712 bs2 = bs*bs; 713 714 VecSet(&zero,v); 715 VecGetArray(v,&x); VecGetLocalSize(v,&n); 716 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 717 for ( i=0; i<ambs; i++ ) { 718 for ( j=ai[i]; j<ai[i+1]; j++ ) { 719 if (aj[j] == i) { 720 row = i*bs; 721 aa_j = aa+j*bs2; 722 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 723 break; 724 } 725 } 726 } 727 return 0; 728 } 729 730 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 731 { 732 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 733 Scalar *l,*r,x,*v,*aa,*li,*ri; 734 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 735 736 ai = a->i; 737 aj = a->j; 738 aa = a->a; 739 m = a->m; 740 n = a->n; 741 bs = a->bs; 742 mbs = a->mbs; 743 bs2 = bs*bs; 744 if (ll) { 745 VecGetArray(ll,&l); VecGetSize(ll,&lm); 746 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 747 for ( i=0; i<mbs; i++ ) { /* for each block row */ 748 M = ai[i+1] - ai[i]; 749 li = l + i*bs; 750 v = aa + bs2*ai[i]; 751 for ( j=0; j<M; j++ ) { /* for each block */ 752 for ( k=0; k<bs2; k++ ) { 753 (*v++) *= li[k%bs]; 754 } 755 } 756 } 757 } 758 759 if (rr) { 760 VecGetArray(rr,&r); VecGetSize(rr,&rn); 761 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 762 for ( i=0; i<mbs; i++ ) { /* for each block row */ 763 M = ai[i+1] - ai[i]; 764 v = aa + bs2*ai[i]; 765 for ( j=0; j<M; j++ ) { /* for each block */ 766 ri = r + bs*aj[ai[i]+j]; 767 for ( k=0; k<bs; k++ ) { 768 x = ri[k]; 769 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 770 } 771 } 772 } 773 } 774 return 0; 775 } 776 777 778 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 779 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 780 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec); 781 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 782 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 783 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 784 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 785 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 786 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 787 788 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 789 { 790 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 791 Scalar *v = a->a; 792 double sum = 0.0; 793 int i; 794 795 if (type == NORM_FROBENIUS) { 796 for (i=0; i<a->nz; i++ ) { 797 #if defined(PETSC_COMPLEX) 798 sum += real(conj(*v)*(*v)); v++; 799 #else 800 sum += (*v)*(*v); v++; 801 #endif 802 } 803 *norm = sqrt(sum); 804 } 805 else { 806 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 807 } 808 return 0; 809 } 810 811 /* 812 note: This can only work for identity for row and col. It would 813 be good to check this and otherwise generate an error. 814 */ 815 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 816 { 817 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 818 Mat outA; 819 int ierr; 820 821 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 822 823 outA = inA; 824 inA->factor = FACTOR_LU; 825 a->row = row; 826 a->col = col; 827 828 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 829 830 if (!a->diag) { 831 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 832 } 833 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 834 return 0; 835 } 836 837 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 838 { 839 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 840 int one = 1, totalnz = a->bs*a->bs*a->nz; 841 BLscal_( &totalnz, alpha, a->a, &one ); 842 PLogFlops(totalnz); 843 return 0; 844 } 845 846 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 847 { 848 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 849 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 850 int *ai = a->i, *ailen = a->ilen, shift = a->indexshift; 851 int brow,bcol,ridx,cidx,bs=a->bs,bs2=bs*bs; 852 Scalar *ap, *aa = a->a, zero = 0.0; 853 854 for ( k=0; k<m; k++ ) { /* loop over rows */ 855 row = im[k]; brow = row/bs; 856 if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 857 if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 858 rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 859 nrow = ailen[brow]; 860 for ( l=0; l<n; l++ ) { /* loop over columns */ 861 if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 862 if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 863 col = in[l] - shift; 864 bcol = col/bs; 865 cidx = col%bs; 866 ridx = row%bs; 867 high = nrow; 868 low = 0; /* assume unsorted */ 869 while (high-low > 5) { 870 t = (low+high)/2; 871 if (rp[t] > bcol) high = t; 872 else low = t; 873 } 874 for ( i=low; i<high; i++ ) { 875 if (rp[i] > bcol) break; 876 if (rp[i] == bcol) { 877 *v++ = ap[bs2*i+bs*cidx+ridx]; 878 goto finished; 879 } 880 } 881 *v++ = zero; 882 finished:; 883 } 884 } 885 return 0; 886 } 887 888 int MatPrintHelp_SeqBAIJ(Mat A) 889 { 890 static int called = 0; 891 892 if (called) return 0; else called = 1; 893 return 0; 894 } 895 /* -------------------------------------------------------------------*/ 896 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 897 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 898 MatMult_SeqBAIJ,0, 899 0,0, 900 MatSolve_SeqBAIJ,0, 901 0,0, 902 MatLUFactor_SeqBAIJ,0, 903 0, 904 MatTranspose_SeqBAIJ, 905 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 906 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 907 0,MatAssemblyEnd_SeqBAIJ, 908 0, 909 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 910 MatGetReordering_SeqBAIJ, 911 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 912 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 913 MatILUFactorSymbolic_SeqBAIJ,0, 914 0,0,/* MatConvert_SeqBAIJ */ 0, 915 0,0, 916 MatConvertSameType_SeqBAIJ,0,0, 917 MatILUFactor_SeqBAIJ,0,0, 918 0,0, 919 MatGetValues_SeqBAIJ,0, 920 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 921 0}; 922 923 /*@C 924 MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format 925 (the default parallel PETSc format). For good matrix assembly performance 926 the user should preallocate the matrix storage by setting the parameter nz 927 (or nzz). By setting these parameters accurately, performance can be 928 increased by more than a factor of 50. 929 930 Input Parameters: 931 . comm - MPI communicator, set to MPI_COMM_SELF 932 . bs - size of block 933 . m - number of rows 934 . n - number of columns 935 . nz - number of block nonzeros per block row (same for all rows) 936 . nzz - number of block nonzeros per block row or PETSC_NULL 937 (possibly different for each row) 938 939 Output Parameter: 940 . A - the matrix 941 942 Notes: 943 The BAIJ format, is fully compatible with standard Fortran 77 944 storage. That is, the stored row and column indices can begin at 945 either one (as in Fortran) or zero. See the users manual for details. 946 947 Specify the preallocated storage with either nz or nnz (not both). 948 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 949 allocation. For additional details, see the users manual chapter on 950 matrices and the file $(PETSC_DIR)/Performance. 951 952 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues() 953 @*/ 954 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 955 { 956 Mat B; 957 Mat_SeqBAIJ *b; 958 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs; 959 960 if (mbs*bs!=m || nbs*bs!=n) 961 SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 962 963 *A = 0; 964 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 965 PLogObjectCreate(B); 966 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 967 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 968 switch (bs) { 969 case 1: 970 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 971 break; 972 case 2: 973 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 974 break; 975 case 3: 976 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 977 break; 978 case 4: 979 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 980 break; 981 case 5: 982 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 983 break; 984 } 985 B->destroy = MatDestroy_SeqBAIJ; 986 B->view = MatView_SeqBAIJ; 987 B->factor = 0; 988 B->lupivotthreshold = 1.0; 989 b->row = 0; 990 b->col = 0; 991 b->reallocs = 0; 992 993 b->m = m; 994 b->mbs = mbs; 995 b->n = n; 996 b->nbs = nbs; 997 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 998 if (nnz == PETSC_NULL) { 999 if (nz == PETSC_DEFAULT) nz = CHUNKSIZE; 1000 else if (nz <= 0) nz = 1; 1001 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1002 nz = nz*mbs; 1003 } 1004 else { 1005 nz = 0; 1006 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1007 } 1008 1009 /* allocate the matrix space */ 1010 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 1011 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 1012 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 1013 b->j = (int *) (b->a + nz*bs2); 1014 PetscMemzero(b->j,nz*sizeof(int)); 1015 b->i = b->j + nz; 1016 b->singlemalloc = 1; 1017 1018 b->i[0] = 0; 1019 for (i=1; i<mbs+1; i++) { 1020 b->i[i] = b->i[i-1] + b->imax[i-1]; 1021 } 1022 1023 /* b->ilen will count nonzeros in each block row so far. */ 1024 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1025 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1026 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1027 1028 b->bs = bs; 1029 b->mbs = mbs; 1030 b->nz = 0; 1031 b->maxnz = nz; 1032 b->sorted = 0; 1033 b->roworiented = 1; 1034 b->nonew = 0; 1035 b->diag = 0; 1036 b->solve_work = 0; 1037 b->mult_work = 0; 1038 b->spptr = 0; 1039 b->indexshift = 0; 1040 *A = B; 1041 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 1042 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 1043 return 0; 1044 } 1045 1046 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 1047 { 1048 Mat C; 1049 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1050 int i,len, mbs = a->mbs, bs = a->bs,nz = a->nz,bs2 = bs*bs; 1051 1052 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 1053 1054 *B = 0; 1055 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 1056 PLogObjectCreate(C); 1057 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1058 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1059 C->destroy = MatDestroy_SeqBAIJ; 1060 C->view = MatView_SeqBAIJ; 1061 C->factor = A->factor; 1062 c->row = 0; 1063 c->col = 0; 1064 C->assembled = PETSC_TRUE; 1065 1066 c->m = a->m; 1067 c->n = a->n; 1068 c->bs = a->bs; 1069 c->mbs = a->mbs; 1070 c->nbs = a->nbs; 1071 c->indexshift = 0; 1072 1073 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1074 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1075 for ( i=0; i<mbs; i++ ) { 1076 c->imax[i] = a->imax[i]; 1077 c->ilen[i] = a->ilen[i]; 1078 } 1079 1080 /* allocate the matrix space */ 1081 c->singlemalloc = 1; 1082 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 1083 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1084 c->j = (int *) (c->a + nz*bs2); 1085 c->i = c->j + nz; 1086 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1087 if (mbs > 0) { 1088 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 1089 if (cpvalues == COPY_VALUES) { 1090 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 1091 } 1092 } 1093 1094 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1095 c->sorted = a->sorted; 1096 c->roworiented = a->roworiented; 1097 c->nonew = a->nonew; 1098 1099 if (a->diag) { 1100 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1101 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1102 for ( i=0; i<mbs; i++ ) { 1103 c->diag[i] = a->diag[i]; 1104 } 1105 } 1106 else c->diag = 0; 1107 c->nz = a->nz; 1108 c->maxnz = a->maxnz; 1109 c->solve_work = 0; 1110 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1111 c->mult_work = 0; 1112 *B = C; 1113 return 0; 1114 } 1115 1116 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1117 { 1118 Mat_SeqBAIJ *a; 1119 Mat B; 1120 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1121 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1122 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1123 int *masked, nmask,tmp,ishift,bs2; 1124 Scalar *aa; 1125 MPI_Comm comm = ((PetscObject) viewer)->comm; 1126 1127 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 1128 bs2 = bs*bs; 1129 1130 MPI_Comm_size(comm,&size); 1131 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 1132 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1133 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1134 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 1135 M = header[1]; N = header[2]; nz = header[3]; 1136 1137 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 1138 1139 /* 1140 This code adds extra rows to make sure the number of rows is 1141 divisible by the blocksize 1142 */ 1143 mbs = M/bs; 1144 extra_rows = bs - M + bs*(mbs); 1145 if (extra_rows == bs) extra_rows = 0; 1146 else mbs++; 1147 if (extra_rows) { 1148 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize"); 1149 } 1150 1151 /* read in row lengths */ 1152 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1153 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1154 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1155 1156 /* read in column indices */ 1157 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 1158 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 1159 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1160 1161 /* loop over row lengths determining block row lengths */ 1162 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1163 PetscMemzero(browlengths,mbs*sizeof(int)); 1164 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 1165 PetscMemzero(mask,mbs*sizeof(int)); 1166 masked = mask + mbs; 1167 rowcount = 0; nzcount = 0; 1168 for ( i=0; i<mbs; i++ ) { 1169 nmask = 0; 1170 for ( j=0; j<bs; j++ ) { 1171 kmax = rowlengths[rowcount]; 1172 for ( k=0; k<kmax; k++ ) { 1173 tmp = jj[nzcount++]/bs; 1174 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1175 } 1176 rowcount++; 1177 } 1178 browlengths[i] += nmask; 1179 /* zero out the mask elements we set */ 1180 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1181 } 1182 1183 /* create our matrix */ 1184 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 1185 CHKERRQ(ierr); 1186 B = *A; 1187 a = (Mat_SeqBAIJ *) B->data; 1188 1189 /* set matrix "i" values */ 1190 a->i[0] = 0; 1191 for ( i=1; i<= mbs; i++ ) { 1192 a->i[i] = a->i[i-1] + browlengths[i-1]; 1193 a->ilen[i-1] = browlengths[i-1]; 1194 } 1195 a->indexshift = 0; 1196 a->nz = 0; 1197 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1198 1199 /* read in nonzero values */ 1200 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1201 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 1202 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1203 1204 /* set "a" and "j" values into matrix */ 1205 nzcount = 0; jcount = 0; 1206 for ( i=0; i<mbs; i++ ) { 1207 nzcountb = nzcount; 1208 nmask = 0; 1209 for ( j=0; j<bs; j++ ) { 1210 kmax = rowlengths[i*bs+j]; 1211 for ( k=0; k<kmax; k++ ) { 1212 tmp = jj[nzcount++]/bs; 1213 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1214 } 1215 rowcount++; 1216 } 1217 /* sort the masked values */ 1218 PetscSortInt(nmask,masked); 1219 1220 /* set "j" values into matrix */ 1221 maskcount = 1; 1222 for ( j=0; j<nmask; j++ ) { 1223 a->j[jcount++] = masked[j]; 1224 mask[masked[j]] = maskcount++; 1225 } 1226 /* set "a" values into matrix */ 1227 ishift = bs2*a->i[i]; 1228 for ( j=0; j<bs; j++ ) { 1229 kmax = rowlengths[i*bs+j]; 1230 for ( k=0; k<kmax; k++ ) { 1231 tmp = jj[nzcountb]/bs ; 1232 block = mask[tmp] - 1; 1233 point = jj[nzcountb] - bs*tmp; 1234 idx = ishift + bs2*block + j + bs*point; 1235 a->a[idx] = aa[nzcountb++]; 1236 } 1237 } 1238 /* zero out the mask elements we set */ 1239 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1240 } 1241 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 1242 1243 PetscFree(rowlengths); 1244 PetscFree(browlengths); 1245 PetscFree(aa); 1246 PetscFree(jj); 1247 PetscFree(mask); 1248 1249 B->assembled = PETSC_TRUE; 1250 1251 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 1252 if (flg) { 1253 Viewer tviewer; 1254 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1255 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 1256 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1257 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1258 } 1259 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 1260 if (flg) { 1261 Viewer tviewer; 1262 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1263 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 1264 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1265 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1266 } 1267 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 1268 if (flg) { 1269 Viewer tviewer; 1270 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1271 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1272 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1273 } 1274 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 1275 if (flg) { 1276 Viewer tviewer; 1277 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1278 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 1279 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1280 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1281 } 1282 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 1283 if (flg) { 1284 Viewer tviewer; 1285 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 1286 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 1287 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 1288 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1289 } 1290 return 0; 1291 } 1292 1293 1294 1295