1 /*$Id: baij.c,v 1.187 1999/10/24 14:02:28 bsmith Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "sys.h" 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNC__ 18 #define __FUNC__ "MatMissingDiag_SeqBAIJ" 19 int MatMissingDiag_SeqBAIJ(Mat A) 20 { 21 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 22 int *diag = a->diag, *jj = a->j,i; 23 24 PetscFunctionBegin; 25 for ( i=0; i<a->mbs; i++ ) { 26 if (jj[diag[i]] != i) { 27 SETERRQ1(1,1,"Matrix is missing diagonal number %d",i); 28 } 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNC__ 34 #define __FUNC__ "MatMarkDiag_SeqBAIJ" 35 int MatMarkDiag_SeqBAIJ(Mat A) 36 { 37 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 38 int i,j, *diag, m = a->mbs; 39 40 PetscFunctionBegin; 41 diag = (int *) PetscMalloc( (m+1)*sizeof(int));CHKPTRQ(diag); 42 PLogObjectMemory(A,(m+1)*sizeof(int)); 43 for ( i=0; i<m; i++ ) { 44 diag[i] = a->i[i+1]; 45 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 46 if (a->j[j] == i) { 47 diag[i] = j; 48 break; 49 } 50 } 51 } 52 a->diag = diag; 53 PetscFunctionReturn(0); 54 } 55 56 57 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 58 59 #undef __FUNC__ 60 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 61 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 62 PetscTruth *done) 63 { 64 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 65 int ierr, n = a->mbs,i; 66 67 PetscFunctionBegin; 68 *nn = n; 69 if (!ia) PetscFunctionReturn(0); 70 if (symmetric) { 71 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 72 } else if (oshift == 1) { 73 /* temporarily add 1 to i and j indices */ 74 int nz = a->i[n] + 1; 75 for ( i=0; i<nz; i++ ) a->j[i]++; 76 for ( i=0; i<n+1; i++ ) a->i[i]++; 77 *ia = a->i; *ja = a->j; 78 } else { 79 *ia = a->i; *ja = a->j; 80 } 81 82 PetscFunctionReturn(0); 83 } 84 85 #undef __FUNC__ 86 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 87 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 88 PetscTruth *done) 89 { 90 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 91 int i,n = a->mbs,ierr; 92 93 PetscFunctionBegin; 94 if (!ia) PetscFunctionReturn(0); 95 if (symmetric) { 96 ierr = PetscFree(*ia);CHKERRQ(ierr); 97 ierr = PetscFree(*ja);CHKERRQ(ierr); 98 } else if (oshift == 1) { 99 int nz = a->i[n]; 100 for ( i=0; i<nz; i++ ) a->j[i]--; 101 for ( i=0; i<n+1; i++ ) a->i[i]--; 102 } 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNC__ 107 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 108 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 109 { 110 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 111 112 PetscFunctionBegin; 113 *bs = baij->bs; 114 PetscFunctionReturn(0); 115 } 116 117 118 #undef __FUNC__ 119 #define __FUNC__ "MatDestroy_SeqBAIJ" 120 int MatDestroy_SeqBAIJ(Mat A) 121 { 122 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 123 int ierr; 124 125 PetscFunctionBegin; 126 if (A->mapping) { 127 ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr); 128 } 129 if (A->bmapping) { 130 ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr); 131 } 132 if (A->rmap) { 133 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 134 } 135 if (A->cmap) { 136 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 137 } 138 #if defined(PETSC_USE_LOG) 139 PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 140 #endif 141 ierr = PetscFree(a->a);CHKERRQ(ierr); 142 if (!a->singlemalloc) { 143 ierr = PetscFree(a->i);CHKERRQ(ierr); 144 ierr = PetscFree(a->j);CHKERRQ(ierr); 145 } 146 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 147 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 148 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 149 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 150 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 151 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 152 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 153 ierr = PetscFree(a);CHKERRQ(ierr); 154 PLogObjectDestroy(A); 155 PetscHeaderDestroy(A); 156 PetscFunctionReturn(0); 157 } 158 159 #undef __FUNC__ 160 #define __FUNC__ "MatSetOption_SeqBAIJ" 161 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 162 { 163 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 164 165 PetscFunctionBegin; 166 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 167 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 168 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 169 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 170 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 171 else if (op == MAT_NEW_NONZERO_LOCATION_ERR) a->nonew = -1; 172 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR) a->nonew = -2; 173 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 174 else if (op == MAT_ROWS_SORTED || 175 op == MAT_ROWS_UNSORTED || 176 op == MAT_SYMMETRIC || 177 op == MAT_STRUCTURALLY_SYMMETRIC || 178 op == MAT_YES_NEW_DIAGONALS || 179 op == MAT_IGNORE_OFF_PROC_ENTRIES || 180 op == MAT_USE_HASH_TABLE) { 181 PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 182 } else if (op == MAT_NO_NEW_DIAGONALS) { 183 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 184 } else { 185 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 186 } 187 PetscFunctionReturn(0); 188 } 189 190 191 #undef __FUNC__ 192 #define __FUNC__ "MatGetSize_SeqBAIJ" 193 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 194 { 195 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 196 197 PetscFunctionBegin; 198 if (m) *m = a->m; 199 if (n) *n = a->n; 200 PetscFunctionReturn(0); 201 } 202 203 #undef __FUNC__ 204 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 205 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 206 { 207 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 208 209 PetscFunctionBegin; 210 *m = 0; *n = a->m; 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNC__ 215 #define __FUNC__ "MatGetRow_SeqBAIJ" 216 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 217 { 218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 219 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 220 MatScalar *aa,*aa_i; 221 Scalar *v_i; 222 223 PetscFunctionBegin; 224 bs = a->bs; 225 ai = a->i; 226 aj = a->j; 227 aa = a->a; 228 bs2 = a->bs2; 229 230 if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range"); 231 232 bn = row/bs; /* Block number */ 233 bp = row % bs; /* Block Position */ 234 M = ai[bn+1] - ai[bn]; 235 *nz = bs*M; 236 237 if (v) { 238 *v = 0; 239 if (*nz) { 240 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) );CHKPTRQ(*v); 241 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 242 v_i = *v + i*bs; 243 aa_i = aa + bs2*(ai[bn] + i); 244 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 245 } 246 } 247 } 248 249 if (idx) { 250 *idx = 0; 251 if (*nz) { 252 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) );CHKPTRQ(*idx); 253 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 254 idx_i = *idx + i*bs; 255 itmp = bs*aj[ai[bn] + i]; 256 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 257 } 258 } 259 } 260 PetscFunctionReturn(0); 261 } 262 263 #undef __FUNC__ 264 #define __FUNC__ "MatRestoreRow_SeqBAIJ" 265 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 266 { 267 int ierr; 268 269 PetscFunctionBegin; 270 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 271 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 272 PetscFunctionReturn(0); 273 } 274 275 #undef __FUNC__ 276 #define __FUNC__ "MatTranspose_SeqBAIJ" 277 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 278 { 279 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 280 Mat C; 281 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 282 int *rows,*cols,bs2=a->bs2; 283 MatScalar *array = a->a; 284 285 PetscFunctionBegin; 286 if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place"); 287 col = (int *) PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col); 288 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 289 290 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 291 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 292 ierr = PetscFree(col);CHKERRQ(ierr); 293 rows = (int *) PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows); 294 cols = rows + bs; 295 for ( i=0; i<mbs; i++ ) { 296 cols[0] = i*bs; 297 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 298 len = ai[i+1] - ai[i]; 299 for ( j=0; j<len; j++ ) { 300 rows[0] = (*aj++)*bs; 301 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 302 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 303 array += bs2; 304 } 305 } 306 ierr = PetscFree(rows);CHKERRQ(ierr); 307 308 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 309 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 310 311 if (B != PETSC_NULL) { 312 *B = C; 313 } else { 314 PetscOps *Abops; 315 MatOps Aops; 316 317 /* This isn't really an in-place transpose */ 318 ierr = PetscFree(a->a);CHKERRQ(ierr); 319 if (!a->singlemalloc) { 320 ierr = PetscFree(a->i);CHKERRQ(ierr); 321 ierr = PetscFree(a->j);CHKERRQ(ierr); 322 } 323 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 324 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 325 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 326 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 327 ierr = PetscFree(a);CHKERRQ(ierr); 328 329 330 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 331 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 332 333 /* 334 This is horrible, horrible code. We need to keep the 335 A pointers for the bops and ops but copy everything 336 else from C. 337 */ 338 Abops = A->bops; 339 Aops = A->ops; 340 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 341 A->bops = Abops; 342 A->ops = Aops; 343 A->qlist = 0; 344 345 PetscHeaderDestroy(C); 346 } 347 PetscFunctionReturn(0); 348 } 349 350 #undef __FUNC__ 351 #define __FUNC__ "MatView_SeqBAIJ_Binary" 352 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 353 { 354 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 355 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 356 Scalar *aa; 357 FILE *file; 358 359 PetscFunctionBegin; 360 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 361 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 362 col_lens[0] = MAT_COOKIE; 363 364 col_lens[1] = a->m; 365 col_lens[2] = a->n; 366 col_lens[3] = a->nz*bs2; 367 368 /* store lengths of each row and write (including header) to file */ 369 count = 0; 370 for ( i=0; i<a->mbs; i++ ) { 371 for ( j=0; j<bs; j++ ) { 372 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 373 } 374 } 375 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr); 376 ierr = PetscFree(col_lens);CHKERRQ(ierr); 377 378 /* store column indices (zero start index) */ 379 jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) );CHKPTRQ(jj); 380 count = 0; 381 for ( i=0; i<a->mbs; i++ ) { 382 for ( j=0; j<bs; j++ ) { 383 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 384 for ( l=0; l<bs; l++ ) { 385 jj[count++] = bs*a->j[k] + l; 386 } 387 } 388 } 389 } 390 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 391 ierr = PetscFree(jj);CHKERRQ(ierr); 392 393 /* store nonzero values */ 394 aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa); 395 count = 0; 396 for ( i=0; i<a->mbs; i++ ) { 397 for ( j=0; j<bs; j++ ) { 398 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 399 for ( l=0; l<bs; l++ ) { 400 aa[count++] = a->a[bs2*k + l*bs + j]; 401 } 402 } 403 } 404 } 405 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 406 ierr = PetscFree(aa);CHKERRQ(ierr); 407 408 ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 409 if (file) { 410 fprintf(file,"-matload_block_size %d\n",a->bs); 411 } 412 PetscFunctionReturn(0); 413 } 414 415 #undef __FUNC__ 416 #define __FUNC__ "MatView_SeqBAIJ_ASCII" 417 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 418 { 419 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 420 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 421 char *outputname; 422 423 PetscFunctionBegin; 424 ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr); 425 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 426 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 427 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 428 } else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 429 SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported"); 430 } else if (format == VIEWER_FORMAT_ASCII_COMMON) { 431 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 432 for ( i=0; i<a->mbs; i++ ) { 433 for ( j=0; j<bs; j++ ) { 434 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 435 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 436 for ( l=0; l<bs; l++ ) { 437 #if defined(PETSC_USE_COMPLEX) 438 if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 439 ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 440 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 441 } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 442 ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 443 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 444 } else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 445 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 446 } 447 #else 448 if (a->a[bs2*k + l*bs + j] != 0.0) { 449 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 450 } 451 #endif 452 } 453 } 454 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 455 } 456 } 457 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 458 } else { 459 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 460 for ( i=0; i<a->mbs; i++ ) { 461 for ( j=0; j<bs; j++ ) { 462 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 463 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 464 for ( l=0; l<bs; l++ ) { 465 #if defined(PETSC_USE_COMPLEX) 466 if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) { 467 ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 468 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 469 } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) { 470 ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 471 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 472 } else { 473 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 474 } 475 #else 476 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 477 #endif 478 } 479 } 480 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 481 } 482 } 483 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 484 } 485 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 486 PetscFunctionReturn(0); 487 } 488 489 #undef __FUNC__ 490 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom" 491 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa) 492 { 493 Mat A = (Mat) Aa; 494 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 495 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 496 double xl,yl,xr,yr,x_l,x_r,y_l,y_r; 497 MatScalar *aa; 498 MPI_Comm comm; 499 Viewer viewer; 500 501 PetscFunctionBegin; 502 /* 503 This is nasty. If this is called from an originally parallel matrix 504 then all processes call this, but only the first has the matrix so the 505 rest should return immediately. 506 */ 507 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 508 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 509 if (rank) PetscFunctionReturn(0); 510 511 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr); 512 513 ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 514 515 /* loop over matrix elements drawing boxes */ 516 color = DRAW_BLUE; 517 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 518 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 519 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 520 x_l = a->j[j]*bs; x_r = x_l + 1.0; 521 aa = a->a + j*bs2; 522 for ( k=0; k<bs; k++ ) { 523 for ( l=0; l<bs; l++ ) { 524 if (PetscReal(*aa++) >= 0.) continue; 525 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 526 } 527 } 528 } 529 } 530 color = DRAW_CYAN; 531 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 532 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 533 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 534 x_l = a->j[j]*bs; x_r = x_l + 1.0; 535 aa = a->a + j*bs2; 536 for ( k=0; k<bs; k++ ) { 537 for ( l=0; l<bs; l++ ) { 538 if (PetscReal(*aa++) != 0.) continue; 539 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 540 } 541 } 542 } 543 } 544 545 color = DRAW_RED; 546 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 547 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 548 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 549 x_l = a->j[j]*bs; x_r = x_l + 1.0; 550 aa = a->a + j*bs2; 551 for ( k=0; k<bs; k++ ) { 552 for ( l=0; l<bs; l++ ) { 553 if (PetscReal(*aa++) <= 0.) continue; 554 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 555 } 556 } 557 } 558 } 559 PetscFunctionReturn(0); 560 } 561 562 #undef __FUNC__ 563 #define __FUNC__ "MatView_SeqBAIJ_Draw" 564 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 565 { 566 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 567 int ierr; 568 double xl,yl,xr,yr,w,h; 569 Draw draw; 570 PetscTruth isnull; 571 572 PetscFunctionBegin; 573 574 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 575 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 576 577 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 578 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 579 xr += w; yr += h; xl = -w; yl = -h; 580 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 581 ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 582 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 583 PetscFunctionReturn(0); 584 } 585 586 #undef __FUNC__ 587 #define __FUNC__ "MatView_SeqBAIJ" 588 int MatView_SeqBAIJ(Mat A,Viewer viewer) 589 { 590 int ierr; 591 PetscTruth issocket,isascii,isbinary,isdraw; 592 593 PetscFunctionBegin; 594 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 595 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 596 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 597 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 598 if (issocket) { 599 SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported"); 600 } else if (isascii){ 601 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 602 } else if (isbinary) { 603 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 604 } else if (isdraw) { 605 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 606 } else { 607 SETERRQ1(1,1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 608 } 609 PetscFunctionReturn(0); 610 } 611 612 613 #undef __FUNC__ 614 #define __FUNC__ "MatGetValues_SeqBAIJ" 615 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 616 { 617 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 618 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 619 int *ai = a->i, *ailen = a->ilen; 620 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 621 MatScalar *ap, *aa = a->a, zero = 0.0; 622 623 PetscFunctionBegin; 624 for ( k=0; k<m; k++ ) { /* loop over rows */ 625 row = im[k]; brow = row/bs; 626 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 627 if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 628 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 629 nrow = ailen[brow]; 630 for ( l=0; l<n; l++ ) { /* loop over columns */ 631 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 632 if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 633 col = in[l] ; 634 bcol = col/bs; 635 cidx = col%bs; 636 ridx = row%bs; 637 high = nrow; 638 low = 0; /* assume unsorted */ 639 while (high-low > 5) { 640 t = (low+high)/2; 641 if (rp[t] > bcol) high = t; 642 else low = t; 643 } 644 for ( i=low; i<high; i++ ) { 645 if (rp[i] > bcol) break; 646 if (rp[i] == bcol) { 647 *v++ = ap[bs2*i+bs*cidx+ridx]; 648 goto finished; 649 } 650 } 651 *v++ = zero; 652 finished:; 653 } 654 } 655 PetscFunctionReturn(0); 656 } 657 658 659 #undef __FUNC__ 660 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 661 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 662 { 663 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 664 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 665 int *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented; 666 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 667 Scalar *value = v; 668 MatScalar *ap,*aa=a->a,*bap; 669 670 PetscFunctionBegin; 671 if (roworiented) { 672 stepval = (n-1)*bs; 673 } else { 674 stepval = (m-1)*bs; 675 } 676 for ( k=0; k<m; k++ ) { /* loop over added rows */ 677 row = im[k]; 678 if (row < 0) continue; 679 #if defined(PETSC_USE_BOPT_g) 680 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 681 #endif 682 rp = aj + ai[row]; 683 ap = aa + bs2*ai[row]; 684 rmax = imax[row]; 685 nrow = ailen[row]; 686 low = 0; 687 for ( l=0; l<n; l++ ) { /* loop over added columns */ 688 if (in[l] < 0) continue; 689 #if defined(PETSC_USE_BOPT_g) 690 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 691 #endif 692 col = in[l]; 693 if (roworiented) { 694 value = v + k*(stepval+bs)*bs + l*bs; 695 } else { 696 value = v + l*(stepval+bs)*bs + k*bs; 697 } 698 if (!sorted) low = 0; high = nrow; 699 while (high-low > 7) { 700 t = (low+high)/2; 701 if (rp[t] > col) high = t; 702 else low = t; 703 } 704 for ( i=low; i<high; i++ ) { 705 if (rp[i] > col) break; 706 if (rp[i] == col) { 707 bap = ap + bs2*i; 708 if (roworiented) { 709 if (is == ADD_VALUES) { 710 for ( ii=0; ii<bs; ii++,value+=stepval ) { 711 for (jj=ii; jj<bs2; jj+=bs ) { 712 bap[jj] += *value++; 713 } 714 } 715 } else { 716 for ( ii=0; ii<bs; ii++,value+=stepval ) { 717 for (jj=ii; jj<bs2; jj+=bs ) { 718 bap[jj] = *value++; 719 } 720 } 721 } 722 } else { 723 if (is == ADD_VALUES) { 724 for ( ii=0; ii<bs; ii++,value+=stepval ) { 725 for (jj=0; jj<bs; jj++ ) { 726 *bap++ += *value++; 727 } 728 } 729 } else { 730 for ( ii=0; ii<bs; ii++,value+=stepval ) { 731 for (jj=0; jj<bs; jj++ ) { 732 *bap++ = *value++; 733 } 734 } 735 } 736 } 737 goto noinsert2; 738 } 739 } 740 if (nonew == 1) goto noinsert2; 741 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 742 if (nrow >= rmax) { 743 /* there is no extra room in row, therefore enlarge */ 744 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 745 MatScalar *new_a; 746 747 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 748 749 /* malloc new storage space */ 750 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 751 new_a = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a); 752 new_j = (int *) (new_a + bs2*new_nz); 753 new_i = new_j + new_nz; 754 755 /* copy over old data into new slots */ 756 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 757 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 758 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 759 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 760 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 761 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 762 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 763 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 764 /* free up old matrix storage */ 765 ierr = PetscFree(a->a);CHKERRQ(ierr); 766 if (!a->singlemalloc) { 767 ierr = PetscFree(a->i);CHKERRQ(ierr); 768 ierr = PetscFree(a->j);CHKERRQ(ierr); 769 } 770 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 771 a->singlemalloc = 1; 772 773 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 774 rmax = imax[row] = imax[row] + CHUNKSIZE; 775 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 776 a->maxnz += bs2*CHUNKSIZE; 777 a->reallocs++; 778 a->nz++; 779 } 780 N = nrow++ - 1; 781 /* shift up all the later entries in this row */ 782 for ( ii=N; ii>=i; ii-- ) { 783 rp[ii+1] = rp[ii]; 784 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 785 } 786 if (N >= i) { 787 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 788 } 789 rp[i] = col; 790 bap = ap + bs2*i; 791 if (roworiented) { 792 for ( ii=0; ii<bs; ii++,value+=stepval) { 793 for (jj=ii; jj<bs2; jj+=bs ) { 794 bap[jj] = *value++; 795 } 796 } 797 } else { 798 for ( ii=0; ii<bs; ii++,value+=stepval ) { 799 for (jj=0; jj<bs; jj++ ) { 800 *bap++ = *value++; 801 } 802 } 803 } 804 noinsert2:; 805 low = i; 806 } 807 ailen[row] = nrow; 808 } 809 PetscFunctionReturn(0); 810 } 811 812 813 #undef __FUNC__ 814 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 815 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 816 { 817 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 818 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 819 int m = a->m,*ip, N, *ailen = a->ilen; 820 int mbs = a->mbs, bs2 = a->bs2,rmax = 0,ierr; 821 MatScalar *aa = a->a, *ap; 822 823 PetscFunctionBegin; 824 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 825 826 if (m) rmax = ailen[0]; 827 for ( i=1; i<mbs; i++ ) { 828 /* move each row back by the amount of empty slots (fshift) before it*/ 829 fshift += imax[i-1] - ailen[i-1]; 830 rmax = PetscMax(rmax,ailen[i]); 831 if (fshift) { 832 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 833 N = ailen[i]; 834 for ( j=0; j<N; j++ ) { 835 ip[j-fshift] = ip[j]; 836 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 837 } 838 } 839 ai[i] = ai[i-1] + ailen[i-1]; 840 } 841 if (mbs) { 842 fshift += imax[mbs-1] - ailen[mbs-1]; 843 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 844 } 845 /* reset ilen and imax for each row */ 846 for ( i=0; i<mbs; i++ ) { 847 ailen[i] = imax[i] = ai[i+1] - ai[i]; 848 } 849 a->nz = ai[mbs]; 850 851 /* diagonals may have moved, so kill the diagonal pointers */ 852 if (fshift && a->diag) { 853 ierr = PetscFree(a->diag);CHKERRQ(ierr); 854 PLogObjectMemory(A,-(m+1)*sizeof(int)); 855 a->diag = 0; 856 } 857 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 858 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 859 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 860 a->reallocs); 861 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 862 a->reallocs = 0; 863 A->info.nz_unneeded = (double)fshift*bs2; 864 865 PetscFunctionReturn(0); 866 } 867 868 869 870 /* 871 This function returns an array of flags which indicate the locations of contiguous 872 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 873 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 874 Assume: sizes should be long enough to hold all the values. 875 */ 876 #undef __FUNC__ 877 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks" 878 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 879 { 880 int i,j,k,row; 881 PetscTruth flg; 882 883 PetscFunctionBegin; 884 for ( i=0,j=0; i<n; j++ ) { 885 row = idx[i]; 886 if (row%bs!=0) { /* Not the begining of a block */ 887 sizes[j] = 1; 888 i++; 889 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 890 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 891 i++; 892 } else { /* Begining of the block, so check if the complete block exists */ 893 flg = PETSC_TRUE; 894 for ( k=1; k<bs; k++ ) { 895 if (row+k != idx[i+k]) { /* break in the block */ 896 flg = PETSC_FALSE; 897 break; 898 } 899 } 900 if (flg == PETSC_TRUE) { /* No break in the bs */ 901 sizes[j] = bs; 902 i+= bs; 903 } else { 904 sizes[j] = 1; 905 i++; 906 } 907 } 908 } 909 *bs_max = j; 910 PetscFunctionReturn(0); 911 } 912 913 #undef __FUNC__ 914 #define __FUNC__ "MatZeroRows_SeqBAIJ" 915 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 916 { 917 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 918 int ierr,i,j,k,count,is_n,*is_idx,*rows; 919 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 920 Scalar zero = 0.0; 921 MatScalar *aa; 922 923 PetscFunctionBegin; 924 /* Make a copy of the IS and sort it */ 925 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 926 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 927 928 /* allocate memory for rows,sizes */ 929 rows = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows); 930 sizes = rows + is_n; 931 932 /* initialize copy IS valurs to rows, and sort them */ 933 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 934 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 935 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 936 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 937 938 for ( i=0,j=0; i<bs_max; j+=sizes[i],i++ ) { 939 row = rows[j]; 940 if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row); 941 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 942 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 943 if (sizes[i] == bs) { 944 if (diag) { 945 if (baij->ilen[row/bs] > 0) { 946 baij->ilen[row/bs] = 1; 947 baij->j[baij->i[row/bs]] = row/bs; 948 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 949 } 950 /* Now insert all the diagoanl values for this bs */ 951 for ( k=0; k<bs; k++ ) { 952 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 953 } 954 } else { /* (!diag) */ 955 baij->ilen[row/bs] = 0; 956 } /* end (!diag) */ 957 } else { /* (sizes[i] != bs) */ 958 #if defined (PETSC_USE_DEBUG) 959 if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1"); 960 #endif 961 for ( k=0; k<count; k++ ) { 962 aa[0] = zero; 963 aa+=bs; 964 } 965 if (diag) { 966 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 967 } 968 } 969 } 970 971 ierr = PetscFree(rows);CHKERRQ(ierr); 972 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 973 PetscFunctionReturn(0); 974 } 975 976 #undef __FUNC__ 977 #define __FUNC__ "MatSetValues_SeqBAIJ" 978 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 979 { 980 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 981 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 982 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 983 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 984 int ridx,cidx,bs2=a->bs2,ierr; 985 MatScalar *ap,value,*aa=a->a,*bap; 986 987 PetscFunctionBegin; 988 for ( k=0; k<m; k++ ) { /* loop over added rows */ 989 row = im[k]; brow = row/bs; 990 if (row < 0) continue; 991 #if defined(PETSC_USE_BOPT_g) 992 if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m); 993 #endif 994 rp = aj + ai[brow]; 995 ap = aa + bs2*ai[brow]; 996 rmax = imax[brow]; 997 nrow = ailen[brow]; 998 low = 0; 999 for ( l=0; l<n; l++ ) { /* loop over added columns */ 1000 if (in[l] < 0) continue; 1001 #if defined(PETSC_USE_BOPT_g) 1002 if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n); 1003 #endif 1004 col = in[l]; bcol = col/bs; 1005 ridx = row % bs; cidx = col % bs; 1006 if (roworiented) { 1007 value = v[l + k*n]; 1008 } else { 1009 value = v[k + l*m]; 1010 } 1011 if (!sorted) low = 0; high = nrow; 1012 while (high-low > 7) { 1013 t = (low+high)/2; 1014 if (rp[t] > bcol) high = t; 1015 else low = t; 1016 } 1017 for ( i=low; i<high; i++ ) { 1018 if (rp[i] > bcol) break; 1019 if (rp[i] == bcol) { 1020 bap = ap + bs2*i + bs*cidx + ridx; 1021 if (is == ADD_VALUES) *bap += value; 1022 else *bap = value; 1023 goto noinsert1; 1024 } 1025 } 1026 if (nonew == 1) goto noinsert1; 1027 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1028 if (nrow >= rmax) { 1029 /* there is no extra room in row, therefore enlarge */ 1030 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1031 MatScalar *new_a; 1032 1033 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1034 1035 /* Malloc new storage space */ 1036 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1037 new_a = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a); 1038 new_j = (int *) (new_a + bs2*new_nz); 1039 new_i = new_j + new_nz; 1040 1041 /* copy over old data into new slots */ 1042 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 1043 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1044 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1045 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1046 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1047 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1048 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1049 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1050 /* free up old matrix storage */ 1051 ierr = PetscFree(a->a);CHKERRQ(ierr); 1052 if (!a->singlemalloc) { 1053 ierr = PetscFree(a->i);CHKERRQ(ierr); 1054 ierr = PetscFree(a->j);CHKERRQ(ierr); 1055 } 1056 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1057 a->singlemalloc = 1; 1058 1059 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1060 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1061 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1062 a->maxnz += bs2*CHUNKSIZE; 1063 a->reallocs++; 1064 a->nz++; 1065 } 1066 N = nrow++ - 1; 1067 /* shift up all the later entries in this row */ 1068 for ( ii=N; ii>=i; ii-- ) { 1069 rp[ii+1] = rp[ii]; 1070 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1071 } 1072 if (N>=i) { 1073 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1074 } 1075 rp[i] = bcol; 1076 ap[bs2*i + bs*cidx + ridx] = value; 1077 noinsert1:; 1078 low = i; 1079 } 1080 ailen[brow] = nrow; 1081 } 1082 PetscFunctionReturn(0); 1083 } 1084 1085 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1086 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1087 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1088 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1089 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 1090 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec); 1091 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 1092 extern int MatScale_SeqBAIJ(Scalar*,Mat); 1093 extern int MatNorm_SeqBAIJ(Mat,NormType,double *); 1094 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*); 1095 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec); 1096 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec); 1097 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *); 1098 extern int MatZeroEntries_SeqBAIJ(Mat); 1099 1100 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1101 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1102 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1103 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1104 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1105 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1106 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec); 1107 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1108 extern int MatSolveTrans_SeqBAIJ_7(Mat,Vec,Vec); 1109 extern int MatSolveTrans_SeqBAIJ_6(Mat,Vec,Vec); 1110 extern int MatSolveTrans_SeqBAIJ_5(Mat,Vec,Vec); 1111 extern int MatSolveTrans_SeqBAIJ_4(Mat,Vec,Vec); 1112 extern int MatSolveTrans_SeqBAIJ_3(Mat,Vec,Vec); 1113 extern int MatSolveTrans_SeqBAIJ_2(Mat,Vec,Vec); 1114 extern int MatSolveTrans_SeqBAIJ_1(Mat,Vec,Vec); 1115 1116 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1117 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1118 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1119 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1120 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1121 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1122 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*); 1123 1124 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec); 1125 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec); 1126 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec); 1127 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec); 1128 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec); 1129 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec); 1130 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec); 1131 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec); 1132 1133 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec); 1134 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec); 1135 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec); 1136 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec); 1137 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec); 1138 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec); 1139 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec); 1140 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec); 1141 1142 #undef __FUNC__ 1143 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1144 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1145 { 1146 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1147 Mat outA; 1148 int ierr; 1149 PetscTruth row_identity, col_identity; 1150 1151 PetscFunctionBegin; 1152 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU"); 1153 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1154 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1155 if (!row_identity || !col_identity) { 1156 SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU"); 1157 } 1158 1159 outA = inA; 1160 inA->factor = FACTOR_LU; 1161 a->row = row; 1162 a->col = col; 1163 1164 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1165 ierr = ISInvertPermutation(col,&(a->icol));CHKERRQ(ierr); 1166 PLogObjectParent(inA,a->icol); 1167 1168 if (!a->solve_work) { 1169 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1170 PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1171 } 1172 1173 if (!a->diag) { 1174 ierr = MatMarkDiag_SeqBAIJ(inA);CHKERRQ(ierr); 1175 } 1176 /* 1177 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1178 for ILU(0) factorization with natural ordering 1179 */ 1180 switch (a->bs) { 1181 case 1: 1182 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_2_NaturalOrdering; 1183 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1184 case 2: 1185 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 1186 inA->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 1187 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_2_NaturalOrdering; 1188 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1189 break; 1190 case 3: 1191 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 1192 inA->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering; 1193 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_3_NaturalOrdering; 1194 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1195 break; 1196 case 4: 1197 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1198 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1199 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_4_NaturalOrdering; 1200 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1201 break; 1202 case 5: 1203 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 1204 inA->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 1205 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_5_NaturalOrdering; 1206 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1207 break; 1208 case 6: 1209 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 1210 inA->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering; 1211 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_6_NaturalOrdering; 1212 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1213 break; 1214 case 7: 1215 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 1216 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_7_NaturalOrdering; 1217 inA->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering; 1218 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1219 break; 1220 } 1221 1222 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1223 1224 PetscFunctionReturn(0); 1225 } 1226 #undef __FUNC__ 1227 #define __FUNC__ "MatPrintHelp_SeqBAIJ" 1228 int MatPrintHelp_SeqBAIJ(Mat A) 1229 { 1230 static int called = 0; 1231 MPI_Comm comm = A->comm; 1232 int ierr; 1233 1234 PetscFunctionBegin; 1235 if (called) {PetscFunctionReturn(0);} else called = 1; 1236 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1237 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1238 PetscFunctionReturn(0); 1239 } 1240 1241 EXTERN_C_BEGIN 1242 #undef __FUNC__ 1243 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1244 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1245 { 1246 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1247 int i,nz,n; 1248 1249 PetscFunctionBegin; 1250 nz = baij->maxnz; 1251 n = baij->n; 1252 for (i=0; i<nz; i++) { 1253 baij->j[i] = indices[i]; 1254 } 1255 baij->nz = nz; 1256 for ( i=0; i<n; i++ ) { 1257 baij->ilen[i] = baij->imax[i]; 1258 } 1259 1260 PetscFunctionReturn(0); 1261 } 1262 EXTERN_C_END 1263 1264 #undef __FUNC__ 1265 #define __FUNC__ "MatSeqBAIJSetColumnIndices" 1266 /*@ 1267 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1268 in the matrix. 1269 1270 Input Parameters: 1271 + mat - the SeqBAIJ matrix 1272 - indices - the column indices 1273 1274 Level: advanced 1275 1276 Notes: 1277 This can be called if you have precomputed the nonzero structure of the 1278 matrix and want to provide it to the matrix object to improve the performance 1279 of the MatSetValues() operation. 1280 1281 You MUST have set the correct numbers of nonzeros per row in the call to 1282 MatCreateSeqBAIJ(). 1283 1284 MUST be called before any calls to MatSetValues(); 1285 1286 @*/ 1287 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1288 { 1289 int ierr,(*f)(Mat,int *); 1290 1291 PetscFunctionBegin; 1292 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1293 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr); 1294 if (f) { 1295 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1296 } else { 1297 SETERRQ(1,1,"Wrong type of matrix to set column indices"); 1298 } 1299 PetscFunctionReturn(0); 1300 } 1301 1302 /* -------------------------------------------------------------------*/ 1303 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1304 MatGetRow_SeqBAIJ, 1305 MatRestoreRow_SeqBAIJ, 1306 MatMult_SeqBAIJ_N, 1307 MatMultAdd_SeqBAIJ_N, 1308 MatMultTrans_SeqBAIJ, 1309 MatMultTransAdd_SeqBAIJ, 1310 MatSolve_SeqBAIJ_N, 1311 0, 1312 0, 1313 0, 1314 MatLUFactor_SeqBAIJ, 1315 0, 1316 0, 1317 MatTranspose_SeqBAIJ, 1318 MatGetInfo_SeqBAIJ, 1319 MatEqual_SeqBAIJ, 1320 MatGetDiagonal_SeqBAIJ, 1321 MatDiagonalScale_SeqBAIJ, 1322 MatNorm_SeqBAIJ, 1323 0, 1324 MatAssemblyEnd_SeqBAIJ, 1325 0, 1326 MatSetOption_SeqBAIJ, 1327 MatZeroEntries_SeqBAIJ, 1328 MatZeroRows_SeqBAIJ, 1329 MatLUFactorSymbolic_SeqBAIJ, 1330 MatLUFactorNumeric_SeqBAIJ_N, 1331 0, 1332 0, 1333 MatGetSize_SeqBAIJ, 1334 MatGetSize_SeqBAIJ, 1335 MatGetOwnershipRange_SeqBAIJ, 1336 MatILUFactorSymbolic_SeqBAIJ, 1337 0, 1338 0, 1339 0, 1340 MatDuplicate_SeqBAIJ, 1341 0, 1342 0, 1343 MatILUFactor_SeqBAIJ, 1344 0, 1345 0, 1346 MatGetSubMatrices_SeqBAIJ, 1347 MatIncreaseOverlap_SeqBAIJ, 1348 MatGetValues_SeqBAIJ, 1349 0, 1350 MatPrintHelp_SeqBAIJ, 1351 MatScale_SeqBAIJ, 1352 0, 1353 0, 1354 0, 1355 MatGetBlockSize_SeqBAIJ, 1356 MatGetRowIJ_SeqBAIJ, 1357 MatRestoreRowIJ_SeqBAIJ, 1358 0, 1359 0, 1360 0, 1361 0, 1362 0, 1363 0, 1364 MatSetValuesBlocked_SeqBAIJ, 1365 MatGetSubMatrix_SeqBAIJ, 1366 0, 1367 0, 1368 MatGetMaps_Petsc}; 1369 1370 EXTERN_C_BEGIN 1371 #undef __FUNC__ 1372 #define __FUNC__ "MatStoreValues_SeqBAIJ" 1373 int MatStoreValues_SeqBAIJ(Mat mat) 1374 { 1375 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1376 int nz = aij->i[aij->m]*aij->bs*aij->bs2; 1377 int ierr; 1378 1379 PetscFunctionBegin; 1380 if (aij->nonew != 1) { 1381 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1382 } 1383 1384 /* allocate space for values if not already there */ 1385 if (!aij->saved_values) { 1386 aij->saved_values = (Scalar *) PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values); 1387 } 1388 1389 /* copy values over */ 1390 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr); 1391 PetscFunctionReturn(0); 1392 } 1393 EXTERN_C_END 1394 1395 EXTERN_C_BEGIN 1396 #undef __FUNC__ 1397 #define __FUNC__ "MatRetrieveValues_SeqBAIJ" 1398 int MatRetrieveValues_SeqBAIJ(Mat mat) 1399 { 1400 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1401 int nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr; 1402 1403 PetscFunctionBegin; 1404 if (aij->nonew != 1) { 1405 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1406 } 1407 if (!aij->saved_values) { 1408 SETERRQ(1,1,"Must call MatStoreValues(A);first"); 1409 } 1410 1411 /* copy values over */ 1412 ierr = PetscMemcpy(aij->a, aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 EXTERN_C_END 1416 1417 #undef __FUNC__ 1418 #define __FUNC__ "MatCreateSeqBAIJ" 1419 /*@C 1420 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1421 compressed row) format. For good matrix assembly performance the 1422 user should preallocate the matrix storage by setting the parameter nz 1423 (or the array nnz). By setting these parameters accurately, performance 1424 during matrix assembly can be increased by more than a factor of 50. 1425 1426 Collective on MPI_Comm 1427 1428 Input Parameters: 1429 + comm - MPI communicator, set to PETSC_COMM_SELF 1430 . bs - size of block 1431 . m - number of rows 1432 . n - number of columns 1433 . nz - number of block nonzeros per block row (same for all rows) 1434 - nnz - array containing the number of block nonzeros in the various block rows 1435 (possibly different for each block row) or PETSC_NULL 1436 1437 Output Parameter: 1438 . A - the matrix 1439 1440 Options Database Keys: 1441 . -mat_no_unroll - uses code that does not unroll the loops in the 1442 block calculations (much slower) 1443 . -mat_block_size - size of the blocks to use 1444 1445 Level: intermediate 1446 1447 Notes: 1448 The block AIJ format is fully compatible with standard Fortran 77 1449 storage. That is, the stored row and column indices can begin at 1450 either one (as in Fortran) or zero. See the users' manual for details. 1451 1452 Specify the preallocated storage with either nz or nnz (not both). 1453 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1454 allocation. For additional details, see the users manual chapter on 1455 matrices. 1456 1457 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1458 @*/ 1459 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1460 { 1461 Mat B; 1462 Mat_SeqBAIJ *b; 1463 int i,len,ierr,mbs,nbs,bs2,size; 1464 PetscTruth flg; 1465 1466 PetscFunctionBegin; 1467 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1468 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1"); 1469 1470 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1471 mbs = m/bs; 1472 nbs = n/bs; 1473 bs2 = bs*bs; 1474 1475 if (mbs*bs!=m || nbs*bs!=n) { 1476 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize"); 1477 } 1478 1479 if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"nz cannot be less than -2: value %d",nz); 1480 if (nnz) { 1481 for (i=0; i<mbs; i++) { 1482 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1483 } 1484 } 1485 1486 *A = 0; 1487 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView); 1488 PLogObjectCreate(B); 1489 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b); 1490 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1491 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1492 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1493 if (!flg) { 1494 switch (bs) { 1495 case 1: 1496 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1497 B->ops->solve = MatSolve_SeqBAIJ_1; 1498 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_1; 1499 B->ops->mult = MatMult_SeqBAIJ_1; 1500 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1501 break; 1502 case 2: 1503 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1504 B->ops->solve = MatSolve_SeqBAIJ_2; 1505 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_2; 1506 B->ops->mult = MatMult_SeqBAIJ_2; 1507 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1508 break; 1509 case 3: 1510 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1511 B->ops->solve = MatSolve_SeqBAIJ_3; 1512 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_3; 1513 B->ops->mult = MatMult_SeqBAIJ_3; 1514 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1515 break; 1516 case 4: 1517 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1518 B->ops->solve = MatSolve_SeqBAIJ_4; 1519 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_4; 1520 B->ops->mult = MatMult_SeqBAIJ_4; 1521 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1522 break; 1523 case 5: 1524 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1525 B->ops->solve = MatSolve_SeqBAIJ_5; 1526 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_5; 1527 B->ops->mult = MatMult_SeqBAIJ_5; 1528 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1529 break; 1530 case 6: 1531 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1532 B->ops->solve = MatSolve_SeqBAIJ_6; 1533 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_6; 1534 B->ops->mult = MatMult_SeqBAIJ_6; 1535 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1536 break; 1537 case 7: 1538 B->ops->mult = MatMult_SeqBAIJ_7; 1539 B->ops->solve = MatSolve_SeqBAIJ_7; 1540 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_7; 1541 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1542 break; 1543 } 1544 } 1545 B->ops->destroy = MatDestroy_SeqBAIJ; 1546 B->ops->view = MatView_SeqBAIJ; 1547 B->factor = 0; 1548 B->lupivotthreshold = 1.0; 1549 B->mapping = 0; 1550 b->row = 0; 1551 b->col = 0; 1552 b->icol = 0; 1553 b->reallocs = 0; 1554 b->saved_values = 0; 1555 1556 b->m = m; B->m = m; B->M = m; 1557 b->n = n; B->n = n; B->N = n; 1558 1559 ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr); 1560 ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr); 1561 1562 b->mbs = mbs; 1563 b->nbs = nbs; 1564 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(b->imax); 1565 if (nnz == PETSC_NULL) { 1566 if (nz == PETSC_DEFAULT) nz = 5; 1567 else if (nz <= 0) nz = 1; 1568 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1569 nz = nz*mbs; 1570 } else { 1571 nz = 0; 1572 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1573 } 1574 1575 /* allocate the matrix space */ 1576 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int); 1577 b->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a); 1578 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1579 b->j = (int *) (b->a + nz*bs2); 1580 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1581 b->i = b->j + nz; 1582 b->singlemalloc = 1; 1583 1584 b->i[0] = 0; 1585 for (i=1; i<mbs+1; i++) { 1586 b->i[i] = b->i[i-1] + b->imax[i-1]; 1587 } 1588 1589 /* b->ilen will count nonzeros in each block row so far. */ 1590 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1591 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1592 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1593 1594 b->bs = bs; 1595 b->bs2 = bs2; 1596 b->mbs = mbs; 1597 b->nz = 0; 1598 b->maxnz = nz*bs2; 1599 b->sorted = 0; 1600 b->roworiented = 1; 1601 b->nonew = 0; 1602 b->diag = 0; 1603 b->solve_work = 0; 1604 b->mult_work = 0; 1605 b->spptr = 0; 1606 B->info.nz_unneeded = (double)b->maxnz; 1607 1608 *A = B; 1609 ierr = OptionsHasName(PETSC_NULL,"-help", &flg);CHKERRQ(ierr); 1610 if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); } 1611 1612 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1613 "MatStoreValues_SeqBAIJ", 1614 (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1615 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1616 "MatRetrieveValues_SeqBAIJ", 1617 (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1618 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1619 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1620 (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1621 PetscFunctionReturn(0); 1622 } 1623 1624 #undef __FUNC__ 1625 #define __FUNC__ "MatDuplicate_SeqBAIJ" 1626 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1627 { 1628 Mat C; 1629 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1630 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1631 1632 PetscFunctionBegin; 1633 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix"); 1634 1635 *B = 0; 1636 PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView); 1637 PLogObjectCreate(C); 1638 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c); 1639 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1640 C->ops->destroy = MatDestroy_SeqBAIJ; 1641 C->ops->view = MatView_SeqBAIJ; 1642 C->factor = A->factor; 1643 c->row = 0; 1644 c->col = 0; 1645 c->icol = 0; 1646 c->saved_values = 0; 1647 C->assembled = PETSC_TRUE; 1648 1649 c->m = C->m = a->m; 1650 c->n = C->n = a->n; 1651 C->M = a->m; 1652 C->N = a->n; 1653 1654 c->bs = a->bs; 1655 c->bs2 = a->bs2; 1656 c->mbs = a->mbs; 1657 c->nbs = a->nbs; 1658 1659 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax); 1660 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen); 1661 for ( i=0; i<mbs; i++ ) { 1662 c->imax[i] = a->imax[i]; 1663 c->ilen[i] = a->ilen[i]; 1664 } 1665 1666 /* allocate the matrix space */ 1667 c->singlemalloc = 1; 1668 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1669 c->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(c->a); 1670 c->j = (int *) (c->a + nz*bs2); 1671 c->i = c->j + nz; 1672 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1673 if (mbs > 0) { 1674 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1675 if (cpvalues == MAT_COPY_VALUES) { 1676 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1677 } else { 1678 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1679 } 1680 } 1681 1682 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1683 c->sorted = a->sorted; 1684 c->roworiented = a->roworiented; 1685 c->nonew = a->nonew; 1686 1687 if (a->diag) { 1688 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(c->diag); 1689 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1690 for ( i=0; i<mbs; i++ ) { 1691 c->diag[i] = a->diag[i]; 1692 } 1693 } else c->diag = 0; 1694 c->nz = a->nz; 1695 c->maxnz = a->maxnz; 1696 c->solve_work = 0; 1697 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1698 c->mult_work = 0; 1699 *B = C; 1700 ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1701 PetscFunctionReturn(0); 1702 } 1703 1704 #undef __FUNC__ 1705 #define __FUNC__ "MatLoad_SeqBAIJ" 1706 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1707 { 1708 Mat_SeqBAIJ *a; 1709 Mat B; 1710 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1711 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1712 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1713 int *masked, nmask,tmp,bs2,ishift; 1714 Scalar *aa; 1715 MPI_Comm comm = ((PetscObject) viewer)->comm; 1716 1717 PetscFunctionBegin; 1718 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1719 bs2 = bs*bs; 1720 1721 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1722 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor"); 1723 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1724 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1725 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object"); 1726 M = header[1]; N = header[2]; nz = header[3]; 1727 1728 if (header[3] < 0) { 1729 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ"); 1730 } 1731 1732 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 1733 1734 /* 1735 This code adds extra rows to make sure the number of rows is 1736 divisible by the blocksize 1737 */ 1738 mbs = M/bs; 1739 extra_rows = bs - M + bs*(mbs); 1740 if (extra_rows == bs) extra_rows = 0; 1741 else mbs++; 1742 if (extra_rows) { 1743 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1744 } 1745 1746 /* read in row lengths */ 1747 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1748 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1749 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1750 1751 /* read in column indices */ 1752 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) );CHKPTRQ(jj); 1753 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1754 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1755 1756 /* loop over row lengths determining block row lengths */ 1757 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1758 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1759 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) );CHKPTRQ(mask); 1760 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1761 masked = mask + mbs; 1762 rowcount = 0; nzcount = 0; 1763 for ( i=0; i<mbs; i++ ) { 1764 nmask = 0; 1765 for ( j=0; j<bs; j++ ) { 1766 kmax = rowlengths[rowcount]; 1767 for ( k=0; k<kmax; k++ ) { 1768 tmp = jj[nzcount++]/bs; 1769 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1770 } 1771 rowcount++; 1772 } 1773 browlengths[i] += nmask; 1774 /* zero out the mask elements we set */ 1775 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1776 } 1777 1778 /* create our matrix */ 1779 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1780 B = *A; 1781 a = (Mat_SeqBAIJ *) B->data; 1782 1783 /* set matrix "i" values */ 1784 a->i[0] = 0; 1785 for ( i=1; i<= mbs; i++ ) { 1786 a->i[i] = a->i[i-1] + browlengths[i-1]; 1787 a->ilen[i-1] = browlengths[i-1]; 1788 } 1789 a->nz = 0; 1790 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1791 1792 /* read in nonzero values */ 1793 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1794 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1795 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1796 1797 /* set "a" and "j" values into matrix */ 1798 nzcount = 0; jcount = 0; 1799 for ( i=0; i<mbs; i++ ) { 1800 nzcountb = nzcount; 1801 nmask = 0; 1802 for ( j=0; j<bs; j++ ) { 1803 kmax = rowlengths[i*bs+j]; 1804 for ( k=0; k<kmax; k++ ) { 1805 tmp = jj[nzcount++]/bs; 1806 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1807 } 1808 rowcount++; 1809 } 1810 /* sort the masked values */ 1811 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1812 1813 /* set "j" values into matrix */ 1814 maskcount = 1; 1815 for ( j=0; j<nmask; j++ ) { 1816 a->j[jcount++] = masked[j]; 1817 mask[masked[j]] = maskcount++; 1818 } 1819 /* set "a" values into matrix */ 1820 ishift = bs2*a->i[i]; 1821 for ( j=0; j<bs; j++ ) { 1822 kmax = rowlengths[i*bs+j]; 1823 for ( k=0; k<kmax; k++ ) { 1824 tmp = jj[nzcountb]/bs ; 1825 block = mask[tmp] - 1; 1826 point = jj[nzcountb] - bs*tmp; 1827 idx = ishift + bs2*block + j + bs*point; 1828 a->a[idx] = aa[nzcountb++]; 1829 } 1830 } 1831 /* zero out the mask elements we set */ 1832 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1833 } 1834 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix"); 1835 1836 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1837 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1838 ierr = PetscFree(aa);CHKERRQ(ierr); 1839 ierr = PetscFree(jj);CHKERRQ(ierr); 1840 ierr = PetscFree(mask);CHKERRQ(ierr); 1841 1842 B->assembled = PETSC_TRUE; 1843 1844 ierr = MatView_Private(B);CHKERRQ(ierr); 1845 PetscFunctionReturn(0); 1846 } 1847 1848 1849 1850