1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "src/mat/impls/sbaij/seq/sbaij.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 19 int MatMissingDiagonal_SeqSBAIJ(Mat A) 20 { 21 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 int MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 int i,mbs = a->mbs,ierr; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 int n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 int s_nz = a->i[n]; 63 for (i=0; i<s_nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 int i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 int s_nz = a->i[n]-1; 84 for (i=0; i<s_nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 93 { 94 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 95 96 PetscFunctionBegin; 97 *bs = sbaij->bs; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 103 int MatDestroy_SeqSBAIJ(Mat A) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 int ierr; 107 108 PetscFunctionBegin; 109 #if defined(PETSC_USE_LOG) 110 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 111 #endif 112 ierr = PetscFree(a->a);CHKERRQ(ierr); 113 if (!a->singlemalloc) { 114 ierr = PetscFree(a->i);CHKERRQ(ierr); 115 ierr = PetscFree(a->j);CHKERRQ(ierr); 116 } 117 if (a->row) { 118 ierr = ISDestroy(a->row);CHKERRQ(ierr); 119 } 120 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 121 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 122 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 123 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 124 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 125 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 126 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 127 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 128 129 if (a->inew){ 130 ierr = PetscFree(a->inew);CHKERRQ(ierr); 131 a->inew = 0; 132 } 133 ierr = PetscFree(a);CHKERRQ(ierr); 134 PetscFunctionReturn(0); 135 } 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 139 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 140 { 141 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 142 143 PetscFunctionBegin; 144 switch (op) { 145 case MAT_ROW_ORIENTED: 146 a->roworiented = PETSC_TRUE; 147 break; 148 case MAT_COLUMN_ORIENTED: 149 a->roworiented = PETSC_FALSE; 150 break; 151 case MAT_COLUMNS_SORTED: 152 a->sorted = PETSC_TRUE; 153 break; 154 case MAT_COLUMNS_UNSORTED: 155 a->sorted = PETSC_FALSE; 156 break; 157 case MAT_KEEP_ZEROED_ROWS: 158 a->keepzeroedrows = PETSC_TRUE; 159 break; 160 case MAT_NO_NEW_NONZERO_LOCATIONS: 161 a->nonew = 1; 162 break; 163 case MAT_NEW_NONZERO_LOCATION_ERR: 164 a->nonew = -1; 165 break; 166 case MAT_NEW_NONZERO_ALLOCATION_ERR: 167 a->nonew = -2; 168 break; 169 case MAT_YES_NEW_NONZERO_LOCATIONS: 170 a->nonew = 0; 171 break; 172 case MAT_ROWS_SORTED: 173 case MAT_ROWS_UNSORTED: 174 case MAT_YES_NEW_DIAGONALS: 175 case MAT_IGNORE_OFF_PROC_ENTRIES: 176 case MAT_USE_HASH_TABLE: 177 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 178 break; 179 case MAT_NO_NEW_DIAGONALS: 180 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 181 default: 182 SETERRQ(PETSC_ERR_SUP,"unknown option"); 183 } 184 PetscFunctionReturn(0); 185 } 186 187 #undef __FUNCT__ 188 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 189 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 190 { 191 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 192 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 193 MatScalar *aa,*aa_i; 194 PetscScalar *v_i; 195 196 PetscFunctionBegin; 197 bs = a->bs; 198 ai = a->i; 199 aj = a->j; 200 aa = a->a; 201 bs2 = a->bs2; 202 203 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 204 205 bn = row/bs; /* Block number */ 206 bp = row % bs; /* Block position */ 207 M = ai[bn+1] - ai[bn]; 208 *ncols = bs*M; 209 210 if (v) { 211 *v = 0; 212 if (*ncols) { 213 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 214 for (i=0; i<M; i++) { /* for each block in the block row */ 215 v_i = *v + i*bs; 216 aa_i = aa + bs2*(ai[bn] + i); 217 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 218 } 219 } 220 } 221 222 if (cols) { 223 *cols = 0; 224 if (*ncols) { 225 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 226 for (i=0; i<M; i++) { /* for each block in the block row */ 227 cols_i = *cols + i*bs; 228 itmp = bs*aj[ai[bn] + i]; 229 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 230 } 231 } 232 } 233 234 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 235 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 236 #ifdef column_search 237 v_i = *v + M*bs; 238 cols_i = *cols + M*bs; 239 for (i=0; i<bn; i++){ /* for each block row */ 240 M = ai[i+1] - ai[i]; 241 for (j=0; j<M; j++){ 242 itmp = aj[ai[i] + j]; /* block column value */ 243 if (itmp == bn){ 244 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 245 for (k=0; k<bs; k++) { 246 *cols_i++ = i*bs+k; 247 *v_i++ = aa_i[k]; 248 } 249 *ncols += bs; 250 break; 251 } 252 } 253 } 254 #endif 255 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 261 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 262 { 263 int ierr; 264 265 PetscFunctionBegin; 266 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 267 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 273 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 274 { 275 int ierr; 276 PetscFunctionBegin; 277 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 278 PetscFunctionReturn(0); 279 } 280 281 #undef __FUNCT__ 282 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 283 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 284 { 285 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 286 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 287 PetscScalar *aa; 288 FILE *file; 289 290 PetscFunctionBegin; 291 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 292 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 293 col_lens[0] = MAT_FILE_COOKIE; 294 295 col_lens[1] = A->m; 296 col_lens[2] = A->m; 297 col_lens[3] = a->s_nz*bs2; 298 299 /* store lengths of each row and write (including header) to file */ 300 count = 0; 301 for (i=0; i<a->mbs; i++) { 302 for (j=0; j<bs; j++) { 303 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 304 } 305 } 306 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 307 ierr = PetscFree(col_lens);CHKERRQ(ierr); 308 309 /* store column indices (zero start index) */ 310 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 311 count = 0; 312 for (i=0; i<a->mbs; i++) { 313 for (j=0; j<bs; j++) { 314 for (k=a->i[i]; k<a->i[i+1]; k++) { 315 for (l=0; l<bs; l++) { 316 jj[count++] = bs*a->j[k] + l; 317 } 318 } 319 } 320 } 321 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 322 ierr = PetscFree(jj);CHKERRQ(ierr); 323 324 /* store nonzero values */ 325 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 326 count = 0; 327 for (i=0; i<a->mbs; i++) { 328 for (j=0; j<bs; j++) { 329 for (k=a->i[i]; k<a->i[i+1]; k++) { 330 for (l=0; l<bs; l++) { 331 aa[count++] = a->a[bs2*k + l*bs + j]; 332 } 333 } 334 } 335 } 336 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 337 ierr = PetscFree(aa);CHKERRQ(ierr); 338 339 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 340 if (file) { 341 fprintf(file,"-matload_block_size %d\n",a->bs); 342 } 343 PetscFunctionReturn(0); 344 } 345 346 #undef __FUNCT__ 347 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 348 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 349 { 350 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 351 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 352 char *name; 353 PetscViewerFormat format; 354 355 PetscFunctionBegin; 356 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 357 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 358 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 359 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 360 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 361 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 362 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 363 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 364 for (i=0; i<a->mbs; i++) { 365 for (j=0; j<bs; j++) { 366 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 367 for (k=a->i[i]; k<a->i[i+1]; k++) { 368 for (l=0; l<bs; l++) { 369 #if defined(PETSC_USE_COMPLEX) 370 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 371 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 372 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 373 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 374 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 375 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 376 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 377 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 378 } 379 #else 380 if (a->a[bs2*k + l*bs + j] != 0.0) { 381 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 382 } 383 #endif 384 } 385 } 386 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 387 } 388 } 389 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 390 } else { 391 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 392 for (i=0; i<a->mbs; i++) { 393 for (j=0; j<bs; j++) { 394 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 395 for (k=a->i[i]; k<a->i[i+1]; k++) { 396 for (l=0; l<bs; l++) { 397 #if defined(PETSC_USE_COMPLEX) 398 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 399 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 400 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 401 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 402 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 403 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 404 } else { 405 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 406 } 407 #else 408 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 409 #endif 410 } 411 } 412 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 413 } 414 } 415 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 416 } 417 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 423 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 424 { 425 Mat A = (Mat) Aa; 426 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 427 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 428 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 429 MatScalar *aa; 430 MPI_Comm comm; 431 PetscViewer viewer; 432 433 PetscFunctionBegin; 434 /* 435 This is nasty. If this is called from an originally parallel matrix 436 then all processes call this,but only the first has the matrix so the 437 rest should return immediately. 438 */ 439 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 440 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 441 if (rank) PetscFunctionReturn(0); 442 443 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 444 445 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 446 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 447 448 /* loop over matrix elements drawing boxes */ 449 color = PETSC_DRAW_BLUE; 450 for (i=0,row=0; i<mbs; i++,row+=bs) { 451 for (j=a->i[i]; j<a->i[i+1]; j++) { 452 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 453 x_l = a->j[j]*bs; x_r = x_l + 1.0; 454 aa = a->a + j*bs2; 455 for (k=0; k<bs; k++) { 456 for (l=0; l<bs; l++) { 457 if (PetscRealPart(*aa++) >= 0.) continue; 458 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 459 } 460 } 461 } 462 } 463 color = PETSC_DRAW_CYAN; 464 for (i=0,row=0; i<mbs; i++,row+=bs) { 465 for (j=a->i[i]; j<a->i[i+1]; j++) { 466 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 467 x_l = a->j[j]*bs; x_r = x_l + 1.0; 468 aa = a->a + j*bs2; 469 for (k=0; k<bs; k++) { 470 for (l=0; l<bs; l++) { 471 if (PetscRealPart(*aa++) != 0.) continue; 472 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 473 } 474 } 475 } 476 } 477 478 color = PETSC_DRAW_RED; 479 for (i=0,row=0; i<mbs; i++,row+=bs) { 480 for (j=a->i[i]; j<a->i[i+1]; j++) { 481 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 482 x_l = a->j[j]*bs; x_r = x_l + 1.0; 483 aa = a->a + j*bs2; 484 for (k=0; k<bs; k++) { 485 for (l=0; l<bs; l++) { 486 if (PetscRealPart(*aa++) <= 0.) continue; 487 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 488 } 489 } 490 } 491 } 492 PetscFunctionReturn(0); 493 } 494 495 #undef __FUNCT__ 496 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 497 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 498 { 499 int ierr; 500 PetscReal xl,yl,xr,yr,w,h; 501 PetscDraw draw; 502 PetscTruth isnull; 503 504 PetscFunctionBegin; 505 506 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 507 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 508 509 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 510 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 511 xr += w; yr += h; xl = -w; yl = -h; 512 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 513 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 514 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 515 PetscFunctionReturn(0); 516 } 517 518 #undef __FUNCT__ 519 #define __FUNCT__ "MatView_SeqSBAIJ" 520 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 521 { 522 int ierr; 523 PetscTruth issocket,isascii,isbinary,isdraw; 524 525 PetscFunctionBegin; 526 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 527 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 528 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 529 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 530 if (issocket) { 531 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 532 } else if (isascii){ 533 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 534 } else if (isbinary) { 535 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 536 } else if (isdraw) { 537 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 538 } else { 539 SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name); 540 } 541 PetscFunctionReturn(0); 542 } 543 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 547 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 548 { 549 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 550 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 551 int *ai = a->i,*ailen = a->ilen; 552 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 553 MatScalar *ap,*aa = a->a,zero = 0.0; 554 555 PetscFunctionBegin; 556 for (k=0; k<m; k++) { /* loop over rows */ 557 row = im[k]; brow = row/bs; 558 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 559 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 560 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 561 nrow = ailen[brow]; 562 for (l=0; l<n; l++) { /* loop over columns */ 563 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 564 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 565 col = in[l] ; 566 bcol = col/bs; 567 cidx = col%bs; 568 ridx = row%bs; 569 high = nrow; 570 low = 0; /* assume unsorted */ 571 while (high-low > 5) { 572 t = (low+high)/2; 573 if (rp[t] > bcol) high = t; 574 else low = t; 575 } 576 for (i=low; i<high; i++) { 577 if (rp[i] > bcol) break; 578 if (rp[i] == bcol) { 579 *v++ = ap[bs2*i+bs*cidx+ridx]; 580 goto finished; 581 } 582 } 583 *v++ = zero; 584 finished:; 585 } 586 } 587 PetscFunctionReturn(0); 588 } 589 590 591 #undef __FUNCT__ 592 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 593 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 594 { 595 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 596 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 597 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 598 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 599 PetscTruth roworiented=a->roworiented; 600 MatScalar *value = v,*ap,*aa = a->a,*bap; 601 602 PetscFunctionBegin; 603 if (roworiented) { 604 stepval = (n-1)*bs; 605 } else { 606 stepval = (m-1)*bs; 607 } 608 for (k=0; k<m; k++) { /* loop over added rows */ 609 row = im[k]; 610 if (row < 0) continue; 611 #if defined(PETSC_USE_BOPT_g) 612 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 613 #endif 614 rp = aj + ai[row]; 615 ap = aa + bs2*ai[row]; 616 rmax = imax[row]; 617 nrow = ailen[row]; 618 low = 0; 619 for (l=0; l<n; l++) { /* loop over added columns */ 620 if (in[l] < 0) continue; 621 #if defined(PETSC_USE_BOPT_g) 622 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 623 #endif 624 col = in[l]; 625 if (col < row) continue; /* ignore lower triangular block */ 626 if (roworiented) { 627 value = v + k*(stepval+bs)*bs + l*bs; 628 } else { 629 value = v + l*(stepval+bs)*bs + k*bs; 630 } 631 if (!sorted) low = 0; high = nrow; 632 while (high-low > 7) { 633 t = (low+high)/2; 634 if (rp[t] > col) high = t; 635 else low = t; 636 } 637 for (i=low; i<high; i++) { 638 if (rp[i] > col) break; 639 if (rp[i] == col) { 640 bap = ap + bs2*i; 641 if (roworiented) { 642 if (is == ADD_VALUES) { 643 for (ii=0; ii<bs; ii++,value+=stepval) { 644 for (jj=ii; jj<bs2; jj+=bs) { 645 bap[jj] += *value++; 646 } 647 } 648 } else { 649 for (ii=0; ii<bs; ii++,value+=stepval) { 650 for (jj=ii; jj<bs2; jj+=bs) { 651 bap[jj] = *value++; 652 } 653 } 654 } 655 } else { 656 if (is == ADD_VALUES) { 657 for (ii=0; ii<bs; ii++,value+=stepval) { 658 for (jj=0; jj<bs; jj++) { 659 *bap++ += *value++; 660 } 661 } 662 } else { 663 for (ii=0; ii<bs; ii++,value+=stepval) { 664 for (jj=0; jj<bs; jj++) { 665 *bap++ = *value++; 666 } 667 } 668 } 669 } 670 goto noinsert2; 671 } 672 } 673 if (nonew == 1) goto noinsert2; 674 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 675 if (nrow >= rmax) { 676 /* there is no extra room in row, therefore enlarge */ 677 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 678 MatScalar *new_a; 679 680 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 681 682 /* malloc new storage space */ 683 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 684 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 685 new_j = (int*)(new_a + bs2*new_nz); 686 new_i = new_j + new_nz; 687 688 /* copy over old data into new slots */ 689 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 690 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 691 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 692 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 693 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 694 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 695 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 696 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 697 /* free up old matrix storage */ 698 ierr = PetscFree(a->a);CHKERRQ(ierr); 699 if (!a->singlemalloc) { 700 ierr = PetscFree(a->i);CHKERRQ(ierr); 701 ierr = PetscFree(a->j);CHKERRQ(ierr); 702 } 703 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 704 a->singlemalloc = PETSC_TRUE; 705 706 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 707 rmax = imax[row] = imax[row] + CHUNKSIZE; 708 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 709 a->s_maxnz += bs2*CHUNKSIZE; 710 a->reallocs++; 711 a->s_nz++; 712 } 713 N = nrow++ - 1; 714 /* shift up all the later entries in this row */ 715 for (ii=N; ii>=i; ii--) { 716 rp[ii+1] = rp[ii]; 717 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 718 } 719 if (N >= i) { 720 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 721 } 722 rp[i] = col; 723 bap = ap + bs2*i; 724 if (roworiented) { 725 for (ii=0; ii<bs; ii++,value+=stepval) { 726 for (jj=ii; jj<bs2; jj+=bs) { 727 bap[jj] = *value++; 728 } 729 } 730 } else { 731 for (ii=0; ii<bs; ii++,value+=stepval) { 732 for (jj=0; jj<bs; jj++) { 733 *bap++ = *value++; 734 } 735 } 736 } 737 noinsert2:; 738 low = i; 739 } 740 ailen[row] = nrow; 741 } 742 PetscFunctionReturn(0); 743 } 744 745 #undef __FUNCT__ 746 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 747 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 748 { 749 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 750 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 751 int m = A->m,*ip,N,*ailen = a->ilen; 752 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 753 MatScalar *aa = a->a,*ap; 754 #if defined(PETSC_HAVE_SPOOLES) 755 PetscTruth flag; 756 #endif 757 758 PetscFunctionBegin; 759 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 760 761 if (m) rmax = ailen[0]; 762 for (i=1; i<mbs; i++) { 763 /* move each row back by the amount of empty slots (fshift) before it*/ 764 fshift += imax[i-1] - ailen[i-1]; 765 rmax = PetscMax(rmax,ailen[i]); 766 if (fshift) { 767 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 768 N = ailen[i]; 769 for (j=0; j<N; j++) { 770 ip[j-fshift] = ip[j]; 771 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 772 } 773 } 774 ai[i] = ai[i-1] + ailen[i-1]; 775 } 776 if (mbs) { 777 fshift += imax[mbs-1] - ailen[mbs-1]; 778 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 779 } 780 /* reset ilen and imax for each row */ 781 for (i=0; i<mbs; i++) { 782 ailen[i] = imax[i] = ai[i+1] - ai[i]; 783 } 784 a->s_nz = ai[mbs]; 785 786 /* diagonals may have moved, reset it */ 787 if (a->diag) { 788 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 789 } 790 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 791 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 792 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 793 a->reallocs); 794 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 795 a->reallocs = 0; 796 A->info.nz_unneeded = (PetscReal)fshift*bs2; 797 798 #if defined(PETSC_HAVE_SPOOLES) 799 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 800 if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); } 801 #endif 802 803 PetscFunctionReturn(0); 804 } 805 806 /* 807 This function returns an array of flags which indicate the locations of contiguous 808 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 809 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 810 Assume: sizes should be long enough to hold all the values. 811 */ 812 #undef __FUNCT__ 813 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 814 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 815 { 816 int i,j,k,row; 817 PetscTruth flg; 818 819 PetscFunctionBegin; 820 for (i=0,j=0; i<n; j++) { 821 row = idx[i]; 822 if (row%bs!=0) { /* Not the begining of a block */ 823 sizes[j] = 1; 824 i++; 825 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 826 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 827 i++; 828 } else { /* Begining of the block, so check if the complete block exists */ 829 flg = PETSC_TRUE; 830 for (k=1; k<bs; k++) { 831 if (row+k != idx[i+k]) { /* break in the block */ 832 flg = PETSC_FALSE; 833 break; 834 } 835 } 836 if (flg == PETSC_TRUE) { /* No break in the bs */ 837 sizes[j] = bs; 838 i+= bs; 839 } else { 840 sizes[j] = 1; 841 i++; 842 } 843 } 844 } 845 *bs_max = j; 846 PetscFunctionReturn(0); 847 } 848 849 #undef __FUNCT__ 850 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 851 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag) 852 { 853 PetscFunctionBegin; 854 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 855 } 856 857 /* Only add/insert a(i,j) with i<=j (blocks). 858 Any a(i,j) with i>j input by user is ingored. 859 */ 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 863 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 864 { 865 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 866 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 867 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 868 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 869 int ridx,cidx,bs2=a->bs2,ierr; 870 MatScalar *ap,value,*aa=a->a,*bap; 871 872 PetscFunctionBegin; 873 874 for (k=0; k<m; k++) { /* loop over added rows */ 875 row = im[k]; /* row number */ 876 brow = row/bs; /* block row number */ 877 if (row < 0) continue; 878 #if defined(PETSC_USE_BOPT_g) 879 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 880 #endif 881 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 882 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 883 rmax = imax[brow]; /* maximum space allocated for this row */ 884 nrow = ailen[brow]; /* actual length of this row */ 885 low = 0; 886 887 for (l=0; l<n; l++) { /* loop over added columns */ 888 if (in[l] < 0) continue; 889 #if defined(PETSC_USE_BOPT_g) 890 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m); 891 #endif 892 col = in[l]; 893 bcol = col/bs; /* block col number */ 894 895 if (brow <= bcol){ 896 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 897 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 898 /* element value a(k,l) */ 899 if (roworiented) { 900 value = v[l + k*n]; 901 } else { 902 value = v[k + l*m]; 903 } 904 905 /* move pointer bap to a(k,l) quickly and add/insert value */ 906 if (!sorted) low = 0; high = nrow; 907 while (high-low > 7) { 908 t = (low+high)/2; 909 if (rp[t] > bcol) high = t; 910 else low = t; 911 } 912 for (i=low; i<high; i++) { 913 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 914 if (rp[i] > bcol) break; 915 if (rp[i] == bcol) { 916 bap = ap + bs2*i + bs*cidx + ridx; 917 if (is == ADD_VALUES) *bap += value; 918 else *bap = value; 919 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 920 if (brow == bcol && ridx < cidx){ 921 bap = ap + bs2*i + bs*ridx + cidx; 922 if (is == ADD_VALUES) *bap += value; 923 else *bap = value; 924 } 925 goto noinsert1; 926 } 927 } 928 929 if (nonew == 1) goto noinsert1; 930 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 931 if (nrow >= rmax) { 932 /* there is no extra room in row, therefore enlarge */ 933 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 934 MatScalar *new_a; 935 936 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 937 938 /* Malloc new storage space */ 939 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 940 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 941 new_j = (int*)(new_a + bs2*new_nz); 942 new_i = new_j + new_nz; 943 944 /* copy over old data into new slots */ 945 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 946 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 947 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 948 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 949 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 950 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 951 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 952 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 953 /* free up old matrix storage */ 954 ierr = PetscFree(a->a);CHKERRQ(ierr); 955 if (!a->singlemalloc) { 956 ierr = PetscFree(a->i);CHKERRQ(ierr); 957 ierr = PetscFree(a->j);CHKERRQ(ierr); 958 } 959 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 960 a->singlemalloc = PETSC_TRUE; 961 962 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 963 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 964 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 965 a->s_maxnz += bs2*CHUNKSIZE; 966 a->reallocs++; 967 a->s_nz++; 968 } 969 970 N = nrow++ - 1; 971 /* shift up all the later entries in this row */ 972 for (ii=N; ii>=i; ii--) { 973 rp[ii+1] = rp[ii]; 974 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 975 } 976 if (N>=i) { 977 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 978 } 979 rp[i] = bcol; 980 ap[bs2*i + bs*cidx + ridx] = value; 981 noinsert1:; 982 low = i; 983 /* } */ 984 } 985 } /* end of if .. if.. */ 986 } /* end of loop over added columns */ 987 ailen[brow] = nrow; 988 } /* end of loop over added rows */ 989 990 PetscFunctionReturn(0); 991 } 992 993 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 994 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 995 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int); 996 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 997 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 998 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 999 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 1000 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat); 1001 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 1002 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 1003 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 1004 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 1005 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 1006 extern int MatZeroEntries_SeqSBAIJ(Mat); 1007 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 1008 1009 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 1010 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 1011 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 1012 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 1013 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 1014 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 1015 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 1016 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 1017 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 1018 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 1019 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 1020 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 1021 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 1022 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 1023 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 1024 1025 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 1026 1027 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 1028 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 1029 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 1030 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 1031 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 1032 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 1033 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 1034 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 1035 1036 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 1037 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 1038 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 1039 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 1040 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 1041 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 1042 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 1043 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 1044 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 1045 1046 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 1047 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 1048 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 1049 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 1050 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 1051 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 1052 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 1053 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 1054 1055 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 1056 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 1057 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 1058 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 1059 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 1060 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 1061 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 1062 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 1063 1064 #ifdef HAVE_MatICCFactor 1065 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1068 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 1069 { 1070 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1071 Mat outA; 1072 int ierr; 1073 PetscTruth row_identity,col_identity; 1074 1075 PetscFunctionBegin; 1076 /* 1077 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1078 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1079 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1080 if (!row_identity || !col_identity) { 1081 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1082 } 1083 */ 1084 1085 outA = inA; 1086 inA->factor = FACTOR_CHOLESKY; 1087 1088 if (!a->diag) { 1089 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1090 } 1091 /* 1092 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1093 for ILU(0) factorization with natural ordering 1094 */ 1095 switch (a->bs) { 1096 case 1: 1097 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1098 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1099 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1100 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1101 case 2: 1102 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1103 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1104 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1105 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1106 break; 1107 case 3: 1108 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1109 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1110 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1111 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1112 break; 1113 case 4: 1114 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1115 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1116 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1117 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1118 break; 1119 case 5: 1120 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1121 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1122 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1123 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1124 break; 1125 case 6: 1126 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1127 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1128 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1129 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1130 break; 1131 case 7: 1132 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1133 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1134 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1135 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1136 break; 1137 default: 1138 a->row = row; 1139 a->icol = col; 1140 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1141 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1142 1143 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1144 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1145 PetscLogObjectParent(inA,a->icol); 1146 1147 if (!a->solve_work) { 1148 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1149 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1150 } 1151 } 1152 1153 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1154 1155 PetscFunctionReturn(0); 1156 } 1157 #endif 1158 1159 #undef __FUNCT__ 1160 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1161 int MatPrintHelp_SeqSBAIJ(Mat A) 1162 { 1163 static PetscTruth called = PETSC_FALSE; 1164 MPI_Comm comm = A->comm; 1165 int ierr; 1166 1167 PetscFunctionBegin; 1168 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1169 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1170 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1171 PetscFunctionReturn(0); 1172 } 1173 1174 EXTERN_C_BEGIN 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1177 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1178 { 1179 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1180 int i,nz,n; 1181 1182 PetscFunctionBegin; 1183 nz = baij->s_maxnz; 1184 n = mat->n; 1185 for (i=0; i<nz; i++) { 1186 baij->j[i] = indices[i]; 1187 } 1188 baij->s_nz = nz; 1189 for (i=0; i<n; i++) { 1190 baij->ilen[i] = baij->imax[i]; 1191 } 1192 1193 PetscFunctionReturn(0); 1194 } 1195 EXTERN_C_END 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1199 /*@ 1200 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1201 in the matrix. 1202 1203 Input Parameters: 1204 + mat - the SeqSBAIJ matrix 1205 - indices - the column indices 1206 1207 Level: advanced 1208 1209 Notes: 1210 This can be called if you have precomputed the nonzero structure of the 1211 matrix and want to provide it to the matrix object to improve the performance 1212 of the MatSetValues() operation. 1213 1214 You MUST have set the correct numbers of nonzeros per row in the call to 1215 MatCreateSeqSBAIJ(). 1216 1217 MUST be called before any calls to MatSetValues() 1218 1219 .seealso: MatCreateSeqSBAIJ 1220 @*/ 1221 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1222 { 1223 int ierr,(*f)(Mat,int *); 1224 1225 PetscFunctionBegin; 1226 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1227 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1228 if (f) { 1229 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1230 } else { 1231 SETERRQ(1,"Wrong type of matrix to set column indices"); 1232 } 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1238 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1239 { 1240 int ierr; 1241 1242 PetscFunctionBegin; 1243 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1244 PetscFunctionReturn(0); 1245 } 1246 1247 #undef __FUNCT__ 1248 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1249 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar **array) 1250 { 1251 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1252 PetscFunctionBegin; 1253 *array = a->a; 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1259 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar **array) 1260 { 1261 PetscFunctionBegin; 1262 PetscFunctionReturn(0); 1263 } 1264 1265 #include "petscblaslapack.h" 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1268 int MatAXPY_SeqSBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1269 { 1270 int ierr,one=1,i; 1271 Mat_SeqSBAIJ *x = (Mat_SeqSBAIJ *)X->data,*y = (Mat_SeqSBAIJ *)Y->data; 1272 int *xtoy,nz,row,xcol,ycol,jx,jy,*xi=x->i,*xj=x->j,*yi=y->i,*yj=y->j; 1273 1274 PetscFunctionBegin; 1275 if (str == SAME_NONZERO_PATTERN) { 1276 BLaxpy_(&x->s_nz,a,x->a,&one,y->a,&one); 1277 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1278 if (x->bs > 1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not support for bs>1"); 1279 ierr = PetscMalloc(x->s_nz*sizeof(int),&xtoy);CHKERRQ(ierr); 1280 i = 0; 1281 for (row=0; row<x->mbs; row++){ 1282 nz = xi[1] - xi[0]; 1283 jy = 0; 1284 for (jx=0; jx<nz; jx++,jy++){ 1285 xcol = xj[*xi + jx]; 1286 ycol = yj[*yi + jy]; /* col index for y */ 1287 while ( ycol < xcol ) { 1288 jy++; 1289 ycol = yj[*yi + jy]; 1290 } 1291 if (xcol != ycol) SETERRQ2(PETSC_ERR_ARG_WRONG,"X matrix entry (%d,%d) is not in Y matrix",row,ycol); 1292 xtoy[i++] = *yi + jy; 1293 } 1294 xi++; yi++; 1295 } 1296 1297 for (i=0; i<x->s_nz; i++) { 1298 y->a[xtoy[i]] += (*a)*(x->a[i]); 1299 } 1300 ierr = PetscFree(xtoy);CHKERRQ(ierr); 1301 } else { 1302 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1303 } 1304 PetscFunctionReturn(0); 1305 } 1306 1307 /* -------------------------------------------------------------------*/ 1308 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1309 MatGetRow_SeqSBAIJ, 1310 MatRestoreRow_SeqSBAIJ, 1311 MatMult_SeqSBAIJ_N, 1312 MatMultAdd_SeqSBAIJ_N, 1313 MatMultTranspose_SeqSBAIJ, 1314 MatMultTransposeAdd_SeqSBAIJ, 1315 MatSolve_SeqSBAIJ_N, 1316 0, 1317 0, 1318 0, 1319 0, 1320 MatCholeskyFactor_SeqSBAIJ, 1321 MatRelax_SeqSBAIJ, 1322 MatTranspose_SeqSBAIJ, 1323 MatGetInfo_SeqSBAIJ, 1324 MatEqual_SeqSBAIJ, 1325 MatGetDiagonal_SeqSBAIJ, 1326 MatDiagonalScale_SeqSBAIJ, 1327 MatNorm_SeqSBAIJ, 1328 0, 1329 MatAssemblyEnd_SeqSBAIJ, 1330 0, 1331 MatSetOption_SeqSBAIJ, 1332 MatZeroEntries_SeqSBAIJ, 1333 MatZeroRows_SeqSBAIJ, 1334 0, 1335 0, 1336 MatCholeskyFactorSymbolic_SeqSBAIJ, 1337 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1338 MatSetUpPreallocation_SeqSBAIJ, 1339 0, 1340 MatICCFactorSymbolic_SeqSBAIJ, 1341 MatGetArray_SeqSBAIJ, 1342 MatRestoreArray_SeqSBAIJ, 1343 MatDuplicate_SeqSBAIJ, 1344 0, 1345 0, 1346 0, 1347 0, 1348 MatAXPY_SeqSBAIJ, 1349 MatGetSubMatrices_SeqSBAIJ, 1350 MatIncreaseOverlap_SeqSBAIJ, 1351 MatGetValues_SeqSBAIJ, 1352 0, 1353 MatPrintHelp_SeqSBAIJ, 1354 MatScale_SeqSBAIJ, 1355 0, 1356 0, 1357 0, 1358 MatGetBlockSize_SeqSBAIJ, 1359 MatGetRowIJ_SeqSBAIJ, 1360 MatRestoreRowIJ_SeqSBAIJ, 1361 0, 1362 0, 1363 0, 1364 0, 1365 0, 1366 0, 1367 MatSetValuesBlocked_SeqSBAIJ, 1368 MatGetSubMatrix_SeqSBAIJ, 1369 0, 1370 0, 1371 MatGetPetscMaps_Petsc, 1372 0, 1373 0, 1374 0, 1375 0, 1376 0, 1377 0, 1378 MatGetRowMax_SeqSBAIJ, 1379 0, 1380 0, 1381 0, 1382 0, 1383 0, 1384 0, 1385 0, 1386 0, 1387 0, 1388 0, 1389 0, 1390 0, 1391 0, 1392 #if !defined(PETSC_USE_COMPLEX) 1393 MatGetInertia_SeqSBAIJ 1394 #else 1395 0 1396 #endif 1397 }; 1398 1399 EXTERN_C_BEGIN 1400 #undef __FUNCT__ 1401 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1402 int MatStoreValues_SeqSBAIJ(Mat mat) 1403 { 1404 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1405 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1406 int ierr; 1407 1408 PetscFunctionBegin; 1409 if (aij->nonew != 1) { 1410 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1411 } 1412 1413 /* allocate space for values if not already there */ 1414 if (!aij->saved_values) { 1415 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1416 } 1417 1418 /* copy values over */ 1419 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1420 PetscFunctionReturn(0); 1421 } 1422 EXTERN_C_END 1423 1424 EXTERN_C_BEGIN 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1427 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1428 { 1429 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1430 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1431 1432 PetscFunctionBegin; 1433 if (aij->nonew != 1) { 1434 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1435 } 1436 if (!aij->saved_values) { 1437 SETERRQ(1,"Must call MatStoreValues(A);first"); 1438 } 1439 1440 /* copy values over */ 1441 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1442 PetscFunctionReturn(0); 1443 } 1444 EXTERN_C_END 1445 1446 EXTERN_C_BEGIN 1447 #undef __FUNCT__ 1448 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1449 int MatCreate_SeqSBAIJ(Mat B) 1450 { 1451 Mat_SeqSBAIJ *b; 1452 int ierr,size; 1453 1454 PetscFunctionBegin; 1455 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1456 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1457 B->m = B->M = PetscMax(B->m,B->M); 1458 B->n = B->N = PetscMax(B->n,B->N); 1459 1460 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1461 B->data = (void*)b; 1462 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1463 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1464 B->ops->destroy = MatDestroy_SeqSBAIJ; 1465 B->ops->view = MatView_SeqSBAIJ; 1466 B->factor = 0; 1467 B->lupivotthreshold = 1.0; 1468 B->mapping = 0; 1469 b->row = 0; 1470 b->icol = 0; 1471 b->reallocs = 0; 1472 b->saved_values = 0; 1473 1474 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1475 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1476 1477 b->sorted = PETSC_FALSE; 1478 b->roworiented = PETSC_TRUE; 1479 b->nonew = 0; 1480 b->diag = 0; 1481 b->solve_work = 0; 1482 b->mult_work = 0; 1483 B->spptr = 0; 1484 b->keepzeroedrows = PETSC_FALSE; 1485 1486 b->inew = 0; 1487 b->jnew = 0; 1488 b->anew = 0; 1489 b->a2anew = 0; 1490 b->permute = PETSC_FALSE; 1491 1492 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1493 "MatStoreValues_SeqSBAIJ", 1494 (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1495 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1496 "MatRetrieveValues_SeqSBAIJ", 1497 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1498 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1499 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1500 (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1501 PetscFunctionReturn(0); 1502 } 1503 EXTERN_C_END 1504 1505 #undef __FUNCT__ 1506 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1507 /*@C 1508 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1509 compressed row) format. For good matrix assembly performance the 1510 user should preallocate the matrix storage by setting the parameter nz 1511 (or the array nnz). By setting these parameters accurately, performance 1512 during matrix assembly can be increased by more than a factor of 50. 1513 1514 Collective on Mat 1515 1516 Input Parameters: 1517 + A - the symmetric matrix 1518 . bs - size of block 1519 . nz - number of block nonzeros per block row (same for all rows) 1520 - nnz - array containing the number of block nonzeros in the upper triangular plus 1521 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1522 1523 Options Database Keys: 1524 . -mat_no_unroll - uses code that does not unroll the loops in the 1525 block calculations (much slower) 1526 . -mat_block_size - size of the blocks to use 1527 1528 Level: intermediate 1529 1530 Notes: 1531 Specify the preallocated storage with either nz or nnz (not both). 1532 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1533 allocation. For additional details, see the users manual chapter on 1534 matrices. 1535 1536 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1537 @*/ 1538 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1539 { 1540 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1541 int i,len,ierr,mbs,bs2; 1542 PetscTruth flg; 1543 int s_nz; 1544 1545 PetscFunctionBegin; 1546 B->preallocated = PETSC_TRUE; 1547 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1548 mbs = B->m/bs; 1549 bs2 = bs*bs; 1550 1551 if (mbs*bs != B->m) { 1552 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1553 } 1554 1555 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1556 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1557 if (nnz) { 1558 for (i=0; i<mbs; i++) { 1559 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1560 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1561 } 1562 } 1563 1564 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1565 if (!flg) { 1566 switch (bs) { 1567 case 1: 1568 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1569 B->ops->solve = MatSolve_SeqSBAIJ_1; 1570 B->ops->solves = MatSolves_SeqSBAIJ_1; 1571 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1572 B->ops->mult = MatMult_SeqSBAIJ_1; 1573 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1574 break; 1575 case 2: 1576 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1577 B->ops->solve = MatSolve_SeqSBAIJ_2; 1578 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1579 B->ops->mult = MatMult_SeqSBAIJ_2; 1580 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1581 break; 1582 case 3: 1583 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1584 B->ops->solve = MatSolve_SeqSBAIJ_3; 1585 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1586 B->ops->mult = MatMult_SeqSBAIJ_3; 1587 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1588 break; 1589 case 4: 1590 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1591 B->ops->solve = MatSolve_SeqSBAIJ_4; 1592 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1593 B->ops->mult = MatMult_SeqSBAIJ_4; 1594 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1595 break; 1596 case 5: 1597 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1598 B->ops->solve = MatSolve_SeqSBAIJ_5; 1599 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1600 B->ops->mult = MatMult_SeqSBAIJ_5; 1601 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1602 break; 1603 case 6: 1604 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1605 B->ops->solve = MatSolve_SeqSBAIJ_6; 1606 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1607 B->ops->mult = MatMult_SeqSBAIJ_6; 1608 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1609 break; 1610 case 7: 1611 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1612 B->ops->solve = MatSolve_SeqSBAIJ_7; 1613 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1614 B->ops->mult = MatMult_SeqSBAIJ_7; 1615 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1616 break; 1617 } 1618 } 1619 1620 b->mbs = mbs; 1621 b->nbs = mbs; 1622 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1623 if (!nnz) { 1624 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1625 else if (nz <= 0) nz = 1; 1626 for (i=0; i<mbs; i++) { 1627 b->imax[i] = nz; 1628 } 1629 nz = nz*mbs; /* total nz */ 1630 } else { 1631 nz = 0; 1632 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1633 } 1634 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1635 s_nz = nz; 1636 1637 /* allocate the matrix space */ 1638 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1639 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1640 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1641 b->j = (int*)(b->a + s_nz*bs2); 1642 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1643 b->i = b->j + s_nz; 1644 b->singlemalloc = PETSC_TRUE; 1645 1646 /* pointer to beginning of each row */ 1647 b->i[0] = 0; 1648 for (i=1; i<mbs+1; i++) { 1649 b->i[i] = b->i[i-1] + b->imax[i-1]; 1650 } 1651 1652 /* b->ilen will count nonzeros in each block row so far. */ 1653 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1654 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1655 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1656 1657 b->bs = bs; 1658 b->bs2 = bs2; 1659 b->s_nz = 0; 1660 b->s_maxnz = s_nz*bs2; 1661 1662 b->inew = 0; 1663 b->jnew = 0; 1664 b->anew = 0; 1665 b->a2anew = 0; 1666 b->permute = PETSC_FALSE; 1667 PetscFunctionReturn(0); 1668 } 1669 1670 1671 #undef __FUNCT__ 1672 #define __FUNCT__ "MatCreateSeqSBAIJ" 1673 /*@C 1674 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1675 compressed row) format. For good matrix assembly performance the 1676 user should preallocate the matrix storage by setting the parameter nz 1677 (or the array nnz). By setting these parameters accurately, performance 1678 during matrix assembly can be increased by more than a factor of 50. 1679 1680 Collective on MPI_Comm 1681 1682 Input Parameters: 1683 + comm - MPI communicator, set to PETSC_COMM_SELF 1684 . bs - size of block 1685 . m - number of rows, or number of columns 1686 . nz - number of block nonzeros per block row (same for all rows) 1687 - nnz - array containing the number of block nonzeros in the upper triangular plus 1688 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1689 1690 Output Parameter: 1691 . A - the symmetric matrix 1692 1693 Options Database Keys: 1694 . -mat_no_unroll - uses code that does not unroll the loops in the 1695 block calculations (much slower) 1696 . -mat_block_size - size of the blocks to use 1697 1698 Level: intermediate 1699 1700 Notes: 1701 1702 Specify the preallocated storage with either nz or nnz (not both). 1703 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1704 allocation. For additional details, see the users manual chapter on 1705 matrices. 1706 1707 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1708 @*/ 1709 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1710 { 1711 int ierr; 1712 1713 PetscFunctionBegin; 1714 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1715 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1716 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1722 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1723 { 1724 Mat C; 1725 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1726 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1727 1728 PetscFunctionBegin; 1729 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1730 1731 *B = 0; 1732 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1733 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1734 c = (Mat_SeqSBAIJ*)C->data; 1735 1736 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1737 C->preallocated = PETSC_TRUE; 1738 C->factor = A->factor; 1739 c->row = 0; 1740 c->icol = 0; 1741 c->saved_values = 0; 1742 c->keepzeroedrows = a->keepzeroedrows; 1743 C->assembled = PETSC_TRUE; 1744 1745 c->bs = a->bs; 1746 c->bs2 = a->bs2; 1747 c->mbs = a->mbs; 1748 c->nbs = a->nbs; 1749 1750 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1751 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1752 for (i=0; i<mbs; i++) { 1753 c->imax[i] = a->imax[i]; 1754 c->ilen[i] = a->ilen[i]; 1755 } 1756 1757 /* allocate the matrix space */ 1758 c->singlemalloc = PETSC_TRUE; 1759 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1760 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1761 c->j = (int*)(c->a + nz*bs2); 1762 c->i = c->j + nz; 1763 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1764 if (mbs > 0) { 1765 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1766 if (cpvalues == MAT_COPY_VALUES) { 1767 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1768 } else { 1769 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1770 } 1771 } 1772 1773 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1774 c->sorted = a->sorted; 1775 c->roworiented = a->roworiented; 1776 c->nonew = a->nonew; 1777 1778 if (a->diag) { 1779 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1780 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1781 for (i=0; i<mbs; i++) { 1782 c->diag[i] = a->diag[i]; 1783 } 1784 } else c->diag = 0; 1785 c->s_nz = a->s_nz; 1786 c->s_maxnz = a->s_maxnz; 1787 c->solve_work = 0; 1788 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1789 c->mult_work = 0; 1790 *B = C; 1791 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1792 PetscFunctionReturn(0); 1793 } 1794 1795 EXTERN_C_BEGIN 1796 #undef __FUNCT__ 1797 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1798 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A) 1799 { 1800 Mat_SeqSBAIJ *a; 1801 Mat B; 1802 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1803 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1804 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1805 int *masked,nmask,tmp,bs2,ishift; 1806 PetscScalar *aa; 1807 MPI_Comm comm = ((PetscObject)viewer)->comm; 1808 #if defined(PETSC_HAVE_SPOOLES) 1809 PetscTruth flag; 1810 #endif 1811 1812 PetscFunctionBegin; 1813 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1814 bs2 = bs*bs; 1815 1816 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1817 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1818 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1819 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1820 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1821 M = header[1]; N = header[2]; nz = header[3]; 1822 1823 if (header[3] < 0) { 1824 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1825 } 1826 1827 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1828 1829 /* 1830 This code adds extra rows to make sure the number of rows is 1831 divisible by the blocksize 1832 */ 1833 mbs = M/bs; 1834 extra_rows = bs - M + bs*(mbs); 1835 if (extra_rows == bs) extra_rows = 0; 1836 else mbs++; 1837 if (extra_rows) { 1838 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1839 } 1840 1841 /* read in row lengths */ 1842 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1843 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1844 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1845 1846 /* read in column indices */ 1847 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1848 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1849 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1850 1851 /* loop over row lengths determining block row lengths */ 1852 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1853 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1854 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1855 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1856 masked = mask + mbs; 1857 rowcount = 0; nzcount = 0; 1858 for (i=0; i<mbs; i++) { 1859 nmask = 0; 1860 for (j=0; j<bs; j++) { 1861 kmax = rowlengths[rowcount]; 1862 for (k=0; k<kmax; k++) { 1863 tmp = jj[nzcount++]/bs; /* block col. index */ 1864 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1865 } 1866 rowcount++; 1867 } 1868 s_browlengths[i] += nmask; 1869 1870 /* zero out the mask elements we set */ 1871 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1872 } 1873 1874 /* create our matrix */ 1875 ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr); 1876 B = *A; 1877 a = (Mat_SeqSBAIJ*)B->data; 1878 1879 /* set matrix "i" values */ 1880 a->i[0] = 0; 1881 for (i=1; i<= mbs; i++) { 1882 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1883 a->ilen[i-1] = s_browlengths[i-1]; 1884 } 1885 a->s_nz = a->i[mbs]; 1886 1887 /* read in nonzero values */ 1888 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1889 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1890 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1891 1892 /* set "a" and "j" values into matrix */ 1893 nzcount = 0; jcount = 0; 1894 for (i=0; i<mbs; i++) { 1895 nzcountb = nzcount; 1896 nmask = 0; 1897 for (j=0; j<bs; j++) { 1898 kmax = rowlengths[i*bs+j]; 1899 for (k=0; k<kmax; k++) { 1900 tmp = jj[nzcount++]/bs; /* block col. index */ 1901 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1902 } 1903 } 1904 /* sort the masked values */ 1905 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1906 1907 /* set "j" values into matrix */ 1908 maskcount = 1; 1909 for (j=0; j<nmask; j++) { 1910 a->j[jcount++] = masked[j]; 1911 mask[masked[j]] = maskcount++; 1912 } 1913 1914 /* set "a" values into matrix */ 1915 ishift = bs2*a->i[i]; 1916 for (j=0; j<bs; j++) { 1917 kmax = rowlengths[i*bs+j]; 1918 for (k=0; k<kmax; k++) { 1919 tmp = jj[nzcountb]/bs ; /* block col. index */ 1920 if (tmp >= i){ 1921 block = mask[tmp] - 1; 1922 point = jj[nzcountb] - bs*tmp; 1923 idx = ishift + bs2*block + j + bs*point; 1924 a->a[idx] = aa[nzcountb]; 1925 } 1926 nzcountb++; 1927 } 1928 } 1929 /* zero out the mask elements we set */ 1930 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1931 } 1932 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1933 1934 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1935 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1936 ierr = PetscFree(aa);CHKERRQ(ierr); 1937 ierr = PetscFree(jj);CHKERRQ(ierr); 1938 ierr = PetscFree(mask);CHKERRQ(ierr); 1939 1940 B->assembled = PETSC_TRUE; 1941 #if defined(PETSC_HAVE_SPOOLES) 1942 ierr = PetscOptionsHasName(B->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 1943 if (flag) { ierr = MatUseSpooles_SeqSBAIJ(B);CHKERRQ(ierr); } 1944 #endif 1945 ierr = MatView_Private(B);CHKERRQ(ierr); 1946 PetscFunctionReturn(0); 1947 } 1948 EXTERN_C_END 1949 1950 #undef __FUNCT__ 1951 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1952 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1953 { 1954 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1955 MatScalar *aa=a->a,*v,*v1; 1956 PetscScalar *x,*b,*t,sum,d; 1957 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 1958 int nz,nz1,*vj,*vj1,i; 1959 1960 PetscFunctionBegin; 1961 its = its*lits; 1962 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1963 1964 if (bs > 1) 1965 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1966 1967 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1968 if (xx != bb) { 1969 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1970 } else { 1971 b = x; 1972 } 1973 1974 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1975 1976 if (flag & SOR_ZERO_INITIAL_GUESS) { 1977 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1978 for (i=0; i<m; i++) 1979 t[i] = b[i]; 1980 1981 for (i=0; i<m; i++){ 1982 d = *(aa + ai[i]); /* diag[i] */ 1983 v = aa + ai[i] + 1; 1984 vj = aj + ai[i] + 1; 1985 nz = ai[i+1] - ai[i] - 1; 1986 x[i] = omega*t[i]/d; 1987 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1988 PetscLogFlops(2*nz-1); 1989 } 1990 } 1991 1992 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1993 for (i=0; i<m; i++) 1994 t[i] = b[i]; 1995 1996 for (i=0; i<m-1; i++){ /* update rhs */ 1997 v = aa + ai[i] + 1; 1998 vj = aj + ai[i] + 1; 1999 nz = ai[i+1] - ai[i] - 1; 2000 while (nz--) t[*vj++] -= x[i]*(*v++); 2001 PetscLogFlops(2*nz-1); 2002 } 2003 for (i=m-1; i>=0; i--){ 2004 d = *(aa + ai[i]); 2005 v = aa + ai[i] + 1; 2006 vj = aj + ai[i] + 1; 2007 nz = ai[i+1] - ai[i] - 1; 2008 sum = t[i]; 2009 while (nz--) sum -= x[*vj++]*(*v++); 2010 PetscLogFlops(2*nz-1); 2011 x[i] = (1-omega)*x[i] + omega*sum/d; 2012 } 2013 } 2014 its--; 2015 } 2016 2017 while (its--) { 2018 /* 2019 forward sweep: 2020 for i=0,...,m-1: 2021 sum[i] = (b[i] - U(i,:)x )/d[i]; 2022 x[i] = (1-omega)x[i] + omega*sum[i]; 2023 b = b - x[i]*U^T(i,:); 2024 2025 */ 2026 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2027 for (i=0; i<m; i++) 2028 t[i] = b[i]; 2029 2030 for (i=0; i<m; i++){ 2031 d = *(aa + ai[i]); /* diag[i] */ 2032 v = aa + ai[i] + 1; v1=v; 2033 vj = aj + ai[i] + 1; vj1=vj; 2034 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2035 sum = t[i]; 2036 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2037 x[i] = (1-omega)*x[i] + omega*sum/d; 2038 while (nz--) t[*vj++] -= x[i]*(*v++); 2039 PetscLogFlops(4*nz-2); 2040 } 2041 } 2042 2043 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2044 /* 2045 backward sweep: 2046 b = b - x[i]*U^T(i,:), i=0,...,n-2 2047 for i=m-1,...,0: 2048 sum[i] = (b[i] - U(i,:)x )/d[i]; 2049 x[i] = (1-omega)x[i] + omega*sum[i]; 2050 */ 2051 for (i=0; i<m; i++) 2052 t[i] = b[i]; 2053 2054 for (i=0; i<m-1; i++){ /* update rhs */ 2055 v = aa + ai[i] + 1; 2056 vj = aj + ai[i] + 1; 2057 nz = ai[i+1] - ai[i] - 1; 2058 while (nz--) t[*vj++] -= x[i]*(*v++); 2059 PetscLogFlops(2*nz-1); 2060 } 2061 for (i=m-1; i>=0; i--){ 2062 d = *(aa + ai[i]); 2063 v = aa + ai[i] + 1; 2064 vj = aj + ai[i] + 1; 2065 nz = ai[i+1] - ai[i] - 1; 2066 sum = t[i]; 2067 while (nz--) sum -= x[*vj++]*(*v++); 2068 PetscLogFlops(2*nz-1); 2069 x[i] = (1-omega)*x[i] + omega*sum/d; 2070 } 2071 } 2072 } 2073 2074 ierr = PetscFree(t); CHKERRQ(ierr); 2075 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2076 if (bb != xx) { 2077 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2078 } 2079 2080 PetscFunctionReturn(0); 2081 } 2082 2083 2084 2085 2086 2087 2088