1 /*$Id: baij.c,v 1.189 1999/11/10 03:19:27 bsmith Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "sys.h" 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNC__ 18 #define __FUNC__ "MatMissingDiagonal_SeqBAIJ" 19 int MatMissingDiagonal_SeqBAIJ(Mat A) 20 { 21 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 22 int *diag = a->diag, *jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 26 for ( i=0; i<a->mbs; i++ ) { 27 if (jj[diag[i]] != i) { 28 SETERRQ1(1,1,"Matrix is missing diagonal number %d",i); 29 } 30 } 31 PetscFunctionReturn(0); 32 } 33 34 #undef __FUNC__ 35 #define __FUNC__ "MatMarkDiagonal_SeqBAIJ" 36 int MatMarkDiagonal_SeqBAIJ(Mat A) 37 { 38 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 39 int i,j, *diag, m = a->mbs; 40 41 PetscFunctionBegin; 42 diag = (int *) PetscMalloc( (m+1)*sizeof(int));CHKPTRQ(diag); 43 PLogObjectMemory(A,(m+1)*sizeof(int)); 44 for ( i=0; i<m; i++ ) { 45 diag[i] = a->i[i+1]; 46 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 47 if (a->j[j] == i) { 48 diag[i] = j; 49 break; 50 } 51 } 52 } 53 a->diag = diag; 54 PetscFunctionReturn(0); 55 } 56 57 58 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 59 60 #undef __FUNC__ 61 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 62 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 63 PetscTruth *done) 64 { 65 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 66 int ierr, n = a->mbs,i; 67 68 PetscFunctionBegin; 69 *nn = n; 70 if (!ia) PetscFunctionReturn(0); 71 if (symmetric) { 72 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 73 } else if (oshift == 1) { 74 /* temporarily add 1 to i and j indices */ 75 int nz = a->i[n] + 1; 76 for ( i=0; i<nz; i++ ) a->j[i]++; 77 for ( i=0; i<n+1; i++ ) a->i[i]++; 78 *ia = a->i; *ja = a->j; 79 } else { 80 *ia = a->i; *ja = a->j; 81 } 82 83 PetscFunctionReturn(0); 84 } 85 86 #undef __FUNC__ 87 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 88 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 89 PetscTruth *done) 90 { 91 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 92 int i,n = a->mbs,ierr; 93 94 PetscFunctionBegin; 95 if (!ia) PetscFunctionReturn(0); 96 if (symmetric) { 97 ierr = PetscFree(*ia);CHKERRQ(ierr); 98 ierr = PetscFree(*ja);CHKERRQ(ierr); 99 } else if (oshift == 1) { 100 int nz = a->i[n]; 101 for ( i=0; i<nz; i++ ) a->j[i]--; 102 for ( i=0; i<n+1; i++ ) a->i[i]--; 103 } 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNC__ 108 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 109 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 110 { 111 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 112 113 PetscFunctionBegin; 114 *bs = baij->bs; 115 PetscFunctionReturn(0); 116 } 117 118 119 #undef __FUNC__ 120 #define __FUNC__ "MatDestroy_SeqBAIJ" 121 int MatDestroy_SeqBAIJ(Mat A) 122 { 123 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 124 int ierr; 125 126 PetscFunctionBegin; 127 if (A->mapping) { 128 ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr); 129 } 130 if (A->bmapping) { 131 ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr); 132 } 133 if (A->rmap) { 134 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 135 } 136 if (A->cmap) { 137 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 138 } 139 #if defined(PETSC_USE_LOG) 140 PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 141 #endif 142 ierr = PetscFree(a->a);CHKERRQ(ierr); 143 if (!a->singlemalloc) { 144 ierr = PetscFree(a->i);CHKERRQ(ierr); 145 ierr = PetscFree(a->j);CHKERRQ(ierr); 146 } 147 if (a->row) { 148 ierr = ISDestroy(a->row);CHKERRQ(ierr); 149 } 150 if (a->col) { 151 ierr = ISDestroy(a->col);CHKERRQ(ierr); 152 } 153 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 154 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 155 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 156 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 157 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 158 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 159 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 160 ierr = PetscFree(a);CHKERRQ(ierr); 161 PLogObjectDestroy(A); 162 PetscHeaderDestroy(A); 163 PetscFunctionReturn(0); 164 } 165 166 #undef __FUNC__ 167 #define __FUNC__ "MatSetOption_SeqBAIJ" 168 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 169 { 170 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 171 172 PetscFunctionBegin; 173 if (op == MAT_ROW_ORIENTED) a->roworiented = PETSC_TRUE; 174 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = PETSC_FALSE; 175 else if (op == MAT_COLUMNS_SORTED) a->sorted = PETSC_TRUE; 176 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = PETSC_FALSE; 177 else if (op == MAT_KEEP_ZEROED_ROWS) a->keepzeroedrows = PETSC_TRUE; 178 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 179 else if (op == MAT_NEW_NONZERO_LOCATION_ERR) a->nonew = -1; 180 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR) a->nonew = -2; 181 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 182 else if (op == MAT_ROWS_SORTED || 183 op == MAT_ROWS_UNSORTED || 184 op == MAT_SYMMETRIC || 185 op == MAT_STRUCTURALLY_SYMMETRIC || 186 op == MAT_YES_NEW_DIAGONALS || 187 op == MAT_IGNORE_OFF_PROC_ENTRIES || 188 op == MAT_USE_HASH_TABLE) { 189 PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 190 } else if (op == MAT_NO_NEW_DIAGONALS) { 191 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 192 } else { 193 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 194 } 195 PetscFunctionReturn(0); 196 } 197 198 199 #undef __FUNC__ 200 #define __FUNC__ "MatGetSize_SeqBAIJ" 201 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 202 { 203 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 204 205 PetscFunctionBegin; 206 if (m) *m = a->m; 207 if (n) *n = a->n; 208 PetscFunctionReturn(0); 209 } 210 211 #undef __FUNC__ 212 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 213 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 214 { 215 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 216 217 PetscFunctionBegin; 218 *m = 0; *n = a->m; 219 PetscFunctionReturn(0); 220 } 221 222 #undef __FUNC__ 223 #define __FUNC__ "MatGetRow_SeqBAIJ" 224 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 225 { 226 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 227 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 228 MatScalar *aa,*aa_i; 229 Scalar *v_i; 230 231 PetscFunctionBegin; 232 bs = a->bs; 233 ai = a->i; 234 aj = a->j; 235 aa = a->a; 236 bs2 = a->bs2; 237 238 if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range"); 239 240 bn = row/bs; /* Block number */ 241 bp = row % bs; /* Block Position */ 242 M = ai[bn+1] - ai[bn]; 243 *nz = bs*M; 244 245 if (v) { 246 *v = 0; 247 if (*nz) { 248 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) );CHKPTRQ(*v); 249 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 250 v_i = *v + i*bs; 251 aa_i = aa + bs2*(ai[bn] + i); 252 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 253 } 254 } 255 } 256 257 if (idx) { 258 *idx = 0; 259 if (*nz) { 260 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) );CHKPTRQ(*idx); 261 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 262 idx_i = *idx + i*bs; 263 itmp = bs*aj[ai[bn] + i]; 264 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNC__ 272 #define __FUNC__ "MatRestoreRow_SeqBAIJ" 273 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 274 { 275 int ierr; 276 277 PetscFunctionBegin; 278 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 279 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNC__ 284 #define __FUNC__ "MatTranspose_SeqBAIJ" 285 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 286 { 287 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 288 Mat C; 289 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 290 int *rows,*cols,bs2=a->bs2; 291 MatScalar *array = a->a; 292 293 PetscFunctionBegin; 294 if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place"); 295 col = (int *) PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col); 296 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 297 298 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 299 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 300 ierr = PetscFree(col);CHKERRQ(ierr); 301 rows = (int *) PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows); 302 cols = rows + bs; 303 for ( i=0; i<mbs; i++ ) { 304 cols[0] = i*bs; 305 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 306 len = ai[i+1] - ai[i]; 307 for ( j=0; j<len; j++ ) { 308 rows[0] = (*aj++)*bs; 309 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 310 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 311 array += bs2; 312 } 313 } 314 ierr = PetscFree(rows);CHKERRQ(ierr); 315 316 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 317 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 318 319 if (B) { 320 *B = C; 321 } else { 322 PetscOps *Abops; 323 MatOps Aops; 324 325 /* This isn't really an in-place transpose */ 326 ierr = PetscFree(a->a);CHKERRQ(ierr); 327 if (!a->singlemalloc) { 328 ierr = PetscFree(a->i);CHKERRQ(ierr); 329 ierr = PetscFree(a->j);CHKERRQ(ierr); 330 } 331 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 332 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 333 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 334 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 335 ierr = PetscFree(a);CHKERRQ(ierr); 336 337 338 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 339 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 340 341 /* 342 This is horrible, horrible code. We need to keep the 343 A pointers for the bops and ops but copy everything 344 else from C. 345 */ 346 Abops = A->bops; 347 Aops = A->ops; 348 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 349 A->bops = Abops; 350 A->ops = Aops; 351 A->qlist = 0; 352 353 PetscHeaderDestroy(C); 354 } 355 PetscFunctionReturn(0); 356 } 357 358 #undef __FUNC__ 359 #define __FUNC__ "MatView_SeqBAIJ_Binary" 360 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 361 { 362 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 363 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 364 Scalar *aa; 365 FILE *file; 366 367 PetscFunctionBegin; 368 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 369 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 370 col_lens[0] = MAT_COOKIE; 371 372 col_lens[1] = a->m; 373 col_lens[2] = a->n; 374 col_lens[3] = a->nz*bs2; 375 376 /* store lengths of each row and write (including header) to file */ 377 count = 0; 378 for ( i=0; i<a->mbs; i++ ) { 379 for ( j=0; j<bs; j++ ) { 380 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 381 } 382 } 383 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr); 384 ierr = PetscFree(col_lens);CHKERRQ(ierr); 385 386 /* store column indices (zero start index) */ 387 jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) );CHKPTRQ(jj); 388 count = 0; 389 for ( i=0; i<a->mbs; i++ ) { 390 for ( j=0; j<bs; j++ ) { 391 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 392 for ( l=0; l<bs; l++ ) { 393 jj[count++] = bs*a->j[k] + l; 394 } 395 } 396 } 397 } 398 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 399 ierr = PetscFree(jj);CHKERRQ(ierr); 400 401 /* store nonzero values */ 402 aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa); 403 count = 0; 404 for ( i=0; i<a->mbs; i++ ) { 405 for ( j=0; j<bs; j++ ) { 406 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 407 for ( l=0; l<bs; l++ ) { 408 aa[count++] = a->a[bs2*k + l*bs + j]; 409 } 410 } 411 } 412 } 413 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 414 ierr = PetscFree(aa);CHKERRQ(ierr); 415 416 ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 417 if (file) { 418 fprintf(file,"-matload_block_size %d\n",a->bs); 419 } 420 PetscFunctionReturn(0); 421 } 422 423 #undef __FUNC__ 424 #define __FUNC__ "MatView_SeqBAIJ_ASCII" 425 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 426 { 427 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 428 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 429 char *outputname; 430 431 PetscFunctionBegin; 432 ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr); 433 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 434 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 435 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 436 } else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 437 SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported"); 438 } else if (format == VIEWER_FORMAT_ASCII_COMMON) { 439 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 440 for ( i=0; i<a->mbs; i++ ) { 441 for ( j=0; j<bs; j++ ) { 442 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 443 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 444 for ( l=0; l<bs; l++ ) { 445 #if defined(PETSC_USE_COMPLEX) 446 if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 447 ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 448 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 449 } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 450 ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 451 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 452 } else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 453 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 454 } 455 #else 456 if (a->a[bs2*k + l*bs + j] != 0.0) { 457 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 458 } 459 #endif 460 } 461 } 462 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 463 } 464 } 465 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 466 } else { 467 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 468 for ( i=0; i<a->mbs; i++ ) { 469 for ( j=0; j<bs; j++ ) { 470 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 471 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 472 for ( l=0; l<bs; l++ ) { 473 #if defined(PETSC_USE_COMPLEX) 474 if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) { 475 ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 476 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 477 } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) { 478 ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 479 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 480 } else { 481 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 482 } 483 #else 484 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 485 #endif 486 } 487 } 488 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 489 } 490 } 491 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 492 } 493 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNC__ 498 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom" 499 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa) 500 { 501 Mat A = (Mat) Aa; 502 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 503 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 504 double xl,yl,xr,yr,x_l,x_r,y_l,y_r; 505 MatScalar *aa; 506 MPI_Comm comm; 507 Viewer viewer; 508 509 PetscFunctionBegin; 510 /* 511 This is nasty. If this is called from an originally parallel matrix 512 then all processes call this, but only the first has the matrix so the 513 rest should return immediately. 514 */ 515 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 516 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 517 if (rank) PetscFunctionReturn(0); 518 519 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr); 520 521 ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 522 523 /* loop over matrix elements drawing boxes */ 524 color = DRAW_BLUE; 525 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 526 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 527 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 528 x_l = a->j[j]*bs; x_r = x_l + 1.0; 529 aa = a->a + j*bs2; 530 for ( k=0; k<bs; k++ ) { 531 for ( l=0; l<bs; l++ ) { 532 if (PetscReal(*aa++) >= 0.) continue; 533 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 534 } 535 } 536 } 537 } 538 color = DRAW_CYAN; 539 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 540 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 541 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 542 x_l = a->j[j]*bs; x_r = x_l + 1.0; 543 aa = a->a + j*bs2; 544 for ( k=0; k<bs; k++ ) { 545 for ( l=0; l<bs; l++ ) { 546 if (PetscReal(*aa++) != 0.) continue; 547 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 548 } 549 } 550 } 551 } 552 553 color = DRAW_RED; 554 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 555 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 556 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 557 x_l = a->j[j]*bs; x_r = x_l + 1.0; 558 aa = a->a + j*bs2; 559 for ( k=0; k<bs; k++ ) { 560 for ( l=0; l<bs; l++ ) { 561 if (PetscReal(*aa++) <= 0.) continue; 562 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 563 } 564 } 565 } 566 } 567 PetscFunctionReturn(0); 568 } 569 570 #undef __FUNC__ 571 #define __FUNC__ "MatView_SeqBAIJ_Draw" 572 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 573 { 574 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 575 int ierr; 576 double xl,yl,xr,yr,w,h; 577 Draw draw; 578 PetscTruth isnull; 579 580 PetscFunctionBegin; 581 582 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 583 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 584 585 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 586 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 587 xr += w; yr += h; xl = -w; yl = -h; 588 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 589 ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 590 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 591 PetscFunctionReturn(0); 592 } 593 594 #undef __FUNC__ 595 #define __FUNC__ "MatView_SeqBAIJ" 596 int MatView_SeqBAIJ(Mat A,Viewer viewer) 597 { 598 int ierr; 599 PetscTruth issocket,isascii,isbinary,isdraw; 600 601 PetscFunctionBegin; 602 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 603 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 604 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 605 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 606 if (issocket) { 607 SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported"); 608 } else if (isascii){ 609 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 610 } else if (isbinary) { 611 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 612 } else if (isdraw) { 613 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 614 } else { 615 SETERRQ1(1,1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 616 } 617 PetscFunctionReturn(0); 618 } 619 620 621 #undef __FUNC__ 622 #define __FUNC__ "MatGetValues_SeqBAIJ" 623 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 624 { 625 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 626 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 627 int *ai = a->i, *ailen = a->ilen; 628 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 629 MatScalar *ap, *aa = a->a, zero = 0.0; 630 631 PetscFunctionBegin; 632 for ( k=0; k<m; k++ ) { /* loop over rows */ 633 row = im[k]; brow = row/bs; 634 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 635 if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 636 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 637 nrow = ailen[brow]; 638 for ( l=0; l<n; l++ ) { /* loop over columns */ 639 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 640 if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 641 col = in[l] ; 642 bcol = col/bs; 643 cidx = col%bs; 644 ridx = row%bs; 645 high = nrow; 646 low = 0; /* assume unsorted */ 647 while (high-low > 5) { 648 t = (low+high)/2; 649 if (rp[t] > bcol) high = t; 650 else low = t; 651 } 652 for ( i=low; i<high; i++ ) { 653 if (rp[i] > bcol) break; 654 if (rp[i] == bcol) { 655 *v++ = ap[bs2*i+bs*cidx+ridx]; 656 goto finished; 657 } 658 } 659 *v++ = zero; 660 finished:; 661 } 662 } 663 PetscFunctionReturn(0); 664 } 665 666 667 #undef __FUNC__ 668 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 669 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 670 { 671 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 672 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 673 int *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented; 674 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 675 Scalar *value = v; 676 MatScalar *ap,*aa=a->a,*bap; 677 678 PetscFunctionBegin; 679 if (roworiented) { 680 stepval = (n-1)*bs; 681 } else { 682 stepval = (m-1)*bs; 683 } 684 for ( k=0; k<m; k++ ) { /* loop over added rows */ 685 row = im[k]; 686 if (row < 0) continue; 687 #if defined(PETSC_USE_BOPT_g) 688 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 689 #endif 690 rp = aj + ai[row]; 691 ap = aa + bs2*ai[row]; 692 rmax = imax[row]; 693 nrow = ailen[row]; 694 low = 0; 695 for ( l=0; l<n; l++ ) { /* loop over added columns */ 696 if (in[l] < 0) continue; 697 #if defined(PETSC_USE_BOPT_g) 698 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 699 #endif 700 col = in[l]; 701 if (roworiented) { 702 value = v + k*(stepval+bs)*bs + l*bs; 703 } else { 704 value = v + l*(stepval+bs)*bs + k*bs; 705 } 706 if (!sorted) low = 0; high = nrow; 707 while (high-low > 7) { 708 t = (low+high)/2; 709 if (rp[t] > col) high = t; 710 else low = t; 711 } 712 for ( i=low; i<high; i++ ) { 713 if (rp[i] > col) break; 714 if (rp[i] == col) { 715 bap = ap + bs2*i; 716 if (roworiented) { 717 if (is == ADD_VALUES) { 718 for ( ii=0; ii<bs; ii++,value+=stepval ) { 719 for (jj=ii; jj<bs2; jj+=bs ) { 720 bap[jj] += *value++; 721 } 722 } 723 } else { 724 for ( ii=0; ii<bs; ii++,value+=stepval ) { 725 for (jj=ii; jj<bs2; jj+=bs ) { 726 bap[jj] = *value++; 727 } 728 } 729 } 730 } else { 731 if (is == ADD_VALUES) { 732 for ( ii=0; ii<bs; ii++,value+=stepval ) { 733 for (jj=0; jj<bs; jj++ ) { 734 *bap++ += *value++; 735 } 736 } 737 } else { 738 for ( ii=0; ii<bs; ii++,value+=stepval ) { 739 for (jj=0; jj<bs; jj++ ) { 740 *bap++ = *value++; 741 } 742 } 743 } 744 } 745 goto noinsert2; 746 } 747 } 748 if (nonew == 1) goto noinsert2; 749 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 750 if (nrow >= rmax) { 751 /* there is no extra room in row, therefore enlarge */ 752 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 753 MatScalar *new_a; 754 755 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 756 757 /* malloc new storage space */ 758 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 759 new_a = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a); 760 new_j = (int *) (new_a + bs2*new_nz); 761 new_i = new_j + new_nz; 762 763 /* copy over old data into new slots */ 764 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 765 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 766 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 767 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 768 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 769 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 770 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 771 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 772 /* free up old matrix storage */ 773 ierr = PetscFree(a->a);CHKERRQ(ierr); 774 if (!a->singlemalloc) { 775 ierr = PetscFree(a->i);CHKERRQ(ierr); 776 ierr = PetscFree(a->j);CHKERRQ(ierr); 777 } 778 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 779 a->singlemalloc = PETSC_TRUE; 780 781 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 782 rmax = imax[row] = imax[row] + CHUNKSIZE; 783 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 784 a->maxnz += bs2*CHUNKSIZE; 785 a->reallocs++; 786 a->nz++; 787 } 788 N = nrow++ - 1; 789 /* shift up all the later entries in this row */ 790 for ( ii=N; ii>=i; ii-- ) { 791 rp[ii+1] = rp[ii]; 792 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 793 } 794 if (N >= i) { 795 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 796 } 797 rp[i] = col; 798 bap = ap + bs2*i; 799 if (roworiented) { 800 for ( ii=0; ii<bs; ii++,value+=stepval) { 801 for (jj=ii; jj<bs2; jj+=bs ) { 802 bap[jj] = *value++; 803 } 804 } 805 } else { 806 for ( ii=0; ii<bs; ii++,value+=stepval ) { 807 for (jj=0; jj<bs; jj++ ) { 808 *bap++ = *value++; 809 } 810 } 811 } 812 noinsert2:; 813 low = i; 814 } 815 ailen[row] = nrow; 816 } 817 PetscFunctionReturn(0); 818 } 819 820 821 #undef __FUNC__ 822 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 823 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 824 { 825 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 826 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 827 int m = a->m,*ip, N, *ailen = a->ilen; 828 int mbs = a->mbs, bs2 = a->bs2,rmax = 0,ierr; 829 MatScalar *aa = a->a, *ap; 830 831 PetscFunctionBegin; 832 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 833 834 if (m) rmax = ailen[0]; 835 for ( i=1; i<mbs; i++ ) { 836 /* move each row back by the amount of empty slots (fshift) before it*/ 837 fshift += imax[i-1] - ailen[i-1]; 838 rmax = PetscMax(rmax,ailen[i]); 839 if (fshift) { 840 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 841 N = ailen[i]; 842 for ( j=0; j<N; j++ ) { 843 ip[j-fshift] = ip[j]; 844 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 845 } 846 } 847 ai[i] = ai[i-1] + ailen[i-1]; 848 } 849 if (mbs) { 850 fshift += imax[mbs-1] - ailen[mbs-1]; 851 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 852 } 853 /* reset ilen and imax for each row */ 854 for ( i=0; i<mbs; i++ ) { 855 ailen[i] = imax[i] = ai[i+1] - ai[i]; 856 } 857 a->nz = ai[mbs]; 858 859 /* diagonals may have moved, so kill the diagonal pointers */ 860 if (fshift && a->diag) { 861 ierr = PetscFree(a->diag);CHKERRQ(ierr); 862 PLogObjectMemory(A,-(m+1)*sizeof(int)); 863 a->diag = 0; 864 } 865 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 866 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 867 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 868 a->reallocs); 869 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 870 a->reallocs = 0; 871 A->info.nz_unneeded = (double)fshift*bs2; 872 873 PetscFunctionReturn(0); 874 } 875 876 877 878 /* 879 This function returns an array of flags which indicate the locations of contiguous 880 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 881 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 882 Assume: sizes should be long enough to hold all the values. 883 */ 884 #undef __FUNC__ 885 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks" 886 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 887 { 888 int i,j,k,row; 889 PetscTruth flg; 890 891 PetscFunctionBegin; 892 for ( i=0,j=0; i<n; j++ ) { 893 row = idx[i]; 894 if (row%bs!=0) { /* Not the begining of a block */ 895 sizes[j] = 1; 896 i++; 897 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 898 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 899 i++; 900 } else { /* Begining of the block, so check if the complete block exists */ 901 flg = PETSC_TRUE; 902 for ( k=1; k<bs; k++ ) { 903 if (row+k != idx[i+k]) { /* break in the block */ 904 flg = PETSC_FALSE; 905 break; 906 } 907 } 908 if (flg == PETSC_TRUE) { /* No break in the bs */ 909 sizes[j] = bs; 910 i+= bs; 911 } else { 912 sizes[j] = 1; 913 i++; 914 } 915 } 916 } 917 *bs_max = j; 918 PetscFunctionReturn(0); 919 } 920 921 #undef __FUNC__ 922 #define __FUNC__ "MatZeroRows_SeqBAIJ" 923 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 924 { 925 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 926 int ierr,i,j,k,count,is_n,*is_idx,*rows; 927 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 928 Scalar zero = 0.0; 929 MatScalar *aa; 930 931 PetscFunctionBegin; 932 /* Make a copy of the IS and sort it */ 933 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 934 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 935 936 /* allocate memory for rows,sizes */ 937 rows = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows); 938 sizes = rows + is_n; 939 940 /* initialize copy IS valurs to rows, and sort them */ 941 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 942 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 943 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 944 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 945 946 for ( i=0,j=0; i<bs_max; j+=sizes[i],i++ ) { 947 row = rows[j]; 948 if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row); 949 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 950 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 951 if (sizes[i] == bs) { 952 if (diag) { 953 if (baij->ilen[row/bs] > 0) { 954 baij->ilen[row/bs] = 1; 955 baij->j[baij->i[row/bs]] = row/bs; 956 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 957 } 958 /* Now insert all the diagoanl values for this bs */ 959 for ( k=0; k<bs; k++ ) { 960 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 961 } 962 } else { /* (!diag) */ 963 baij->ilen[row/bs] = 0; 964 } /* end (!diag) */ 965 } else { /* (sizes[i] != bs) */ 966 #if defined (PETSC_USE_DEBUG) 967 if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1"); 968 #endif 969 for ( k=0; k<count; k++ ) { 970 aa[0] = zero; 971 aa+=bs; 972 } 973 if (diag) { 974 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 975 } 976 } 977 } 978 979 ierr = PetscFree(rows);CHKERRQ(ierr); 980 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNC__ 985 #define __FUNC__ "MatSetValues_SeqBAIJ" 986 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 987 { 988 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 989 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 990 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 991 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 992 int ridx,cidx,bs2=a->bs2,ierr; 993 MatScalar *ap,value,*aa=a->a,*bap; 994 995 PetscFunctionBegin; 996 for ( k=0; k<m; k++ ) { /* loop over added rows */ 997 row = im[k]; brow = row/bs; 998 if (row < 0) continue; 999 #if defined(PETSC_USE_BOPT_g) 1000 if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m); 1001 #endif 1002 rp = aj + ai[brow]; 1003 ap = aa + bs2*ai[brow]; 1004 rmax = imax[brow]; 1005 nrow = ailen[brow]; 1006 low = 0; 1007 for ( l=0; l<n; l++ ) { /* loop over added columns */ 1008 if (in[l] < 0) continue; 1009 #if defined(PETSC_USE_BOPT_g) 1010 if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n); 1011 #endif 1012 col = in[l]; bcol = col/bs; 1013 ridx = row % bs; cidx = col % bs; 1014 if (roworiented) { 1015 value = v[l + k*n]; 1016 } else { 1017 value = v[k + l*m]; 1018 } 1019 if (!sorted) low = 0; high = nrow; 1020 while (high-low > 7) { 1021 t = (low+high)/2; 1022 if (rp[t] > bcol) high = t; 1023 else low = t; 1024 } 1025 for ( i=low; i<high; i++ ) { 1026 if (rp[i] > bcol) break; 1027 if (rp[i] == bcol) { 1028 bap = ap + bs2*i + bs*cidx + ridx; 1029 if (is == ADD_VALUES) *bap += value; 1030 else *bap = value; 1031 goto noinsert1; 1032 } 1033 } 1034 if (nonew == 1) goto noinsert1; 1035 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1036 if (nrow >= rmax) { 1037 /* there is no extra room in row, therefore enlarge */ 1038 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1039 MatScalar *new_a; 1040 1041 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1042 1043 /* Malloc new storage space */ 1044 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1045 new_a = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a); 1046 new_j = (int *) (new_a + bs2*new_nz); 1047 new_i = new_j + new_nz; 1048 1049 /* copy over old data into new slots */ 1050 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 1051 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1052 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1053 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1054 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1055 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1056 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1057 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1058 /* free up old matrix storage */ 1059 ierr = PetscFree(a->a);CHKERRQ(ierr); 1060 if (!a->singlemalloc) { 1061 ierr = PetscFree(a->i);CHKERRQ(ierr); 1062 ierr = PetscFree(a->j);CHKERRQ(ierr); 1063 } 1064 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1065 a->singlemalloc = PETSC_TRUE; 1066 1067 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1068 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1069 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1070 a->maxnz += bs2*CHUNKSIZE; 1071 a->reallocs++; 1072 a->nz++; 1073 } 1074 N = nrow++ - 1; 1075 /* shift up all the later entries in this row */ 1076 for ( ii=N; ii>=i; ii-- ) { 1077 rp[ii+1] = rp[ii]; 1078 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1079 } 1080 if (N>=i) { 1081 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1082 } 1083 rp[i] = bcol; 1084 ap[bs2*i + bs*cidx + ridx] = value; 1085 noinsert1:; 1086 low = i; 1087 } 1088 ailen[brow] = nrow; 1089 } 1090 PetscFunctionReturn(0); 1091 } 1092 1093 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1094 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1095 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1096 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1097 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 1098 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec); 1099 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 1100 extern int MatScale_SeqBAIJ(Scalar*,Mat); 1101 extern int MatNorm_SeqBAIJ(Mat,NormType,double *); 1102 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*); 1103 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec); 1104 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec); 1105 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *); 1106 extern int MatZeroEntries_SeqBAIJ(Mat); 1107 1108 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1109 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1110 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1111 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1112 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1113 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1114 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec); 1115 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1116 extern int MatSolveTrans_SeqBAIJ_7(Mat,Vec,Vec); 1117 extern int MatSolveTrans_SeqBAIJ_6(Mat,Vec,Vec); 1118 extern int MatSolveTrans_SeqBAIJ_5(Mat,Vec,Vec); 1119 extern int MatSolveTrans_SeqBAIJ_4(Mat,Vec,Vec); 1120 extern int MatSolveTrans_SeqBAIJ_3(Mat,Vec,Vec); 1121 extern int MatSolveTrans_SeqBAIJ_2(Mat,Vec,Vec); 1122 extern int MatSolveTrans_SeqBAIJ_1(Mat,Vec,Vec); 1123 1124 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1125 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1126 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1127 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1128 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1129 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1130 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*); 1131 1132 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec); 1133 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec); 1134 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec); 1135 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec); 1136 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec); 1137 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec); 1138 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec); 1139 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec); 1140 1141 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec); 1142 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec); 1143 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec); 1144 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec); 1145 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec); 1146 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec); 1147 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec); 1148 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec); 1149 1150 #undef __FUNC__ 1151 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1152 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1153 { 1154 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1155 Mat outA; 1156 int ierr; 1157 PetscTruth row_identity, col_identity; 1158 1159 PetscFunctionBegin; 1160 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU"); 1161 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1162 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1163 if (!row_identity || !col_identity) { 1164 SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU"); 1165 } 1166 1167 outA = inA; 1168 inA->factor = FACTOR_LU; 1169 1170 if (!a->diag) { 1171 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1172 } 1173 /* 1174 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1175 for ILU(0) factorization with natural ordering 1176 */ 1177 switch (a->bs) { 1178 case 1: 1179 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_2_NaturalOrdering; 1180 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1181 case 2: 1182 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 1183 inA->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 1184 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_2_NaturalOrdering; 1185 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1186 break; 1187 case 3: 1188 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 1189 inA->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering; 1190 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_3_NaturalOrdering; 1191 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1192 break; 1193 case 4: 1194 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1195 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1196 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_4_NaturalOrdering; 1197 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1198 break; 1199 case 5: 1200 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 1201 inA->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 1202 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_5_NaturalOrdering; 1203 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1204 break; 1205 case 6: 1206 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 1207 inA->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering; 1208 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_6_NaturalOrdering; 1209 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1210 break; 1211 case 7: 1212 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 1213 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_7_NaturalOrdering; 1214 inA->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering; 1215 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1216 break; 1217 default: 1218 a->row = row; 1219 a->col = col; 1220 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1221 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1222 1223 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1224 ierr = ISInvertPermutation(col,&(a->icol));CHKERRQ(ierr); 1225 PLogObjectParent(inA,a->icol); 1226 1227 if (!a->solve_work) { 1228 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1229 PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1230 } 1231 } 1232 1233 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1234 1235 PetscFunctionReturn(0); 1236 } 1237 #undef __FUNC__ 1238 #define __FUNC__ "MatPrintHelp_SeqBAIJ" 1239 int MatPrintHelp_SeqBAIJ(Mat A) 1240 { 1241 static PetscTruth called = PETSC_FALSE; 1242 MPI_Comm comm = A->comm; 1243 int ierr; 1244 1245 PetscFunctionBegin; 1246 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1247 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1248 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 EXTERN_C_BEGIN 1253 #undef __FUNC__ 1254 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1255 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1256 { 1257 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1258 int i,nz,n; 1259 1260 PetscFunctionBegin; 1261 nz = baij->maxnz; 1262 n = baij->n; 1263 for (i=0; i<nz; i++) { 1264 baij->j[i] = indices[i]; 1265 } 1266 baij->nz = nz; 1267 for ( i=0; i<n; i++ ) { 1268 baij->ilen[i] = baij->imax[i]; 1269 } 1270 1271 PetscFunctionReturn(0); 1272 } 1273 EXTERN_C_END 1274 1275 #undef __FUNC__ 1276 #define __FUNC__ "MatSeqBAIJSetColumnIndices" 1277 /*@ 1278 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1279 in the matrix. 1280 1281 Input Parameters: 1282 + mat - the SeqBAIJ matrix 1283 - indices - the column indices 1284 1285 Level: advanced 1286 1287 Notes: 1288 This can be called if you have precomputed the nonzero structure of the 1289 matrix and want to provide it to the matrix object to improve the performance 1290 of the MatSetValues() operation. 1291 1292 You MUST have set the correct numbers of nonzeros per row in the call to 1293 MatCreateSeqBAIJ(). 1294 1295 MUST be called before any calls to MatSetValues(); 1296 1297 @*/ 1298 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1299 { 1300 int ierr,(*f)(Mat,int *); 1301 1302 PetscFunctionBegin; 1303 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1304 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr); 1305 if (f) { 1306 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1307 } else { 1308 SETERRQ(1,1,"Wrong type of matrix to set column indices"); 1309 } 1310 PetscFunctionReturn(0); 1311 } 1312 1313 /* -------------------------------------------------------------------*/ 1314 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1315 MatGetRow_SeqBAIJ, 1316 MatRestoreRow_SeqBAIJ, 1317 MatMult_SeqBAIJ_N, 1318 MatMultAdd_SeqBAIJ_N, 1319 MatMultTrans_SeqBAIJ, 1320 MatMultTransAdd_SeqBAIJ, 1321 MatSolve_SeqBAIJ_N, 1322 0, 1323 0, 1324 0, 1325 MatLUFactor_SeqBAIJ, 1326 0, 1327 0, 1328 MatTranspose_SeqBAIJ, 1329 MatGetInfo_SeqBAIJ, 1330 MatEqual_SeqBAIJ, 1331 MatGetDiagonal_SeqBAIJ, 1332 MatDiagonalScale_SeqBAIJ, 1333 MatNorm_SeqBAIJ, 1334 0, 1335 MatAssemblyEnd_SeqBAIJ, 1336 0, 1337 MatSetOption_SeqBAIJ, 1338 MatZeroEntries_SeqBAIJ, 1339 MatZeroRows_SeqBAIJ, 1340 MatLUFactorSymbolic_SeqBAIJ, 1341 MatLUFactorNumeric_SeqBAIJ_N, 1342 0, 1343 0, 1344 MatGetSize_SeqBAIJ, 1345 MatGetSize_SeqBAIJ, 1346 MatGetOwnershipRange_SeqBAIJ, 1347 MatILUFactorSymbolic_SeqBAIJ, 1348 0, 1349 0, 1350 0, 1351 MatDuplicate_SeqBAIJ, 1352 0, 1353 0, 1354 MatILUFactor_SeqBAIJ, 1355 0, 1356 0, 1357 MatGetSubMatrices_SeqBAIJ, 1358 MatIncreaseOverlap_SeqBAIJ, 1359 MatGetValues_SeqBAIJ, 1360 0, 1361 MatPrintHelp_SeqBAIJ, 1362 MatScale_SeqBAIJ, 1363 0, 1364 0, 1365 0, 1366 MatGetBlockSize_SeqBAIJ, 1367 MatGetRowIJ_SeqBAIJ, 1368 MatRestoreRowIJ_SeqBAIJ, 1369 0, 1370 0, 1371 0, 1372 0, 1373 0, 1374 0, 1375 MatSetValuesBlocked_SeqBAIJ, 1376 MatGetSubMatrix_SeqBAIJ, 1377 0, 1378 0, 1379 MatGetMaps_Petsc}; 1380 1381 EXTERN_C_BEGIN 1382 #undef __FUNC__ 1383 #define __FUNC__ "MatStoreValues_SeqBAIJ" 1384 int MatStoreValues_SeqBAIJ(Mat mat) 1385 { 1386 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1387 int nz = aij->i[aij->m]*aij->bs*aij->bs2; 1388 int ierr; 1389 1390 PetscFunctionBegin; 1391 if (aij->nonew != 1) { 1392 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1393 } 1394 1395 /* allocate space for values if not already there */ 1396 if (!aij->saved_values) { 1397 aij->saved_values = (Scalar *) PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values); 1398 } 1399 1400 /* copy values over */ 1401 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr); 1402 PetscFunctionReturn(0); 1403 } 1404 EXTERN_C_END 1405 1406 EXTERN_C_BEGIN 1407 #undef __FUNC__ 1408 #define __FUNC__ "MatRetrieveValues_SeqBAIJ" 1409 int MatRetrieveValues_SeqBAIJ(Mat mat) 1410 { 1411 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1412 int nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr; 1413 1414 PetscFunctionBegin; 1415 if (aij->nonew != 1) { 1416 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1417 } 1418 if (!aij->saved_values) { 1419 SETERRQ(1,1,"Must call MatStoreValues(A);first"); 1420 } 1421 1422 /* copy values over */ 1423 ierr = PetscMemcpy(aij->a, aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr); 1424 PetscFunctionReturn(0); 1425 } 1426 EXTERN_C_END 1427 1428 #undef __FUNC__ 1429 #define __FUNC__ "MatCreateSeqBAIJ" 1430 /*@C 1431 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1432 compressed row) format. For good matrix assembly performance the 1433 user should preallocate the matrix storage by setting the parameter nz 1434 (or the array nnz). By setting these parameters accurately, performance 1435 during matrix assembly can be increased by more than a factor of 50. 1436 1437 Collective on MPI_Comm 1438 1439 Input Parameters: 1440 + comm - MPI communicator, set to PETSC_COMM_SELF 1441 . bs - size of block 1442 . m - number of rows 1443 . n - number of columns 1444 . nz - number of block nonzeros per block row (same for all rows) 1445 - nnz - array containing the number of block nonzeros in the various block rows 1446 (possibly different for each block row) or PETSC_NULL 1447 1448 Output Parameter: 1449 . A - the matrix 1450 1451 Options Database Keys: 1452 . -mat_no_unroll - uses code that does not unroll the loops in the 1453 block calculations (much slower) 1454 . -mat_block_size - size of the blocks to use 1455 1456 Level: intermediate 1457 1458 Notes: 1459 The block AIJ format is fully compatible with standard Fortran 77 1460 storage. That is, the stored row and column indices can begin at 1461 either one (as in Fortran) or zero. See the users' manual for details. 1462 1463 Specify the preallocated storage with either nz or nnz (not both). 1464 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1465 allocation. For additional details, see the users manual chapter on 1466 matrices. 1467 1468 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1469 @*/ 1470 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1471 { 1472 Mat B; 1473 Mat_SeqBAIJ *b; 1474 int i,len,ierr,mbs,nbs,bs2,size; 1475 PetscTruth flg; 1476 1477 PetscFunctionBegin; 1478 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1479 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1"); 1480 1481 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1482 mbs = m/bs; 1483 nbs = n/bs; 1484 bs2 = bs*bs; 1485 1486 if (mbs*bs!=m || nbs*bs!=n) { 1487 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize"); 1488 } 1489 1490 if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"nz cannot be less than -2: value %d",nz); 1491 if (nnz) { 1492 for (i=0; i<mbs; i++) { 1493 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1494 } 1495 } 1496 1497 *A = 0; 1498 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView); 1499 PLogObjectCreate(B); 1500 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b); 1501 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1502 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1503 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1504 if (!flg) { 1505 switch (bs) { 1506 case 1: 1507 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1508 B->ops->solve = MatSolve_SeqBAIJ_1; 1509 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_1; 1510 B->ops->mult = MatMult_SeqBAIJ_1; 1511 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1512 break; 1513 case 2: 1514 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1515 B->ops->solve = MatSolve_SeqBAIJ_2; 1516 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_2; 1517 B->ops->mult = MatMult_SeqBAIJ_2; 1518 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1519 break; 1520 case 3: 1521 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1522 B->ops->solve = MatSolve_SeqBAIJ_3; 1523 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_3; 1524 B->ops->mult = MatMult_SeqBAIJ_3; 1525 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1526 break; 1527 case 4: 1528 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1529 B->ops->solve = MatSolve_SeqBAIJ_4; 1530 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_4; 1531 B->ops->mult = MatMult_SeqBAIJ_4; 1532 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1533 break; 1534 case 5: 1535 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1536 B->ops->solve = MatSolve_SeqBAIJ_5; 1537 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_5; 1538 B->ops->mult = MatMult_SeqBAIJ_5; 1539 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1540 break; 1541 case 6: 1542 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1543 B->ops->solve = MatSolve_SeqBAIJ_6; 1544 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_6; 1545 B->ops->mult = MatMult_SeqBAIJ_6; 1546 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1547 break; 1548 case 7: 1549 B->ops->mult = MatMult_SeqBAIJ_7; 1550 B->ops->solve = MatSolve_SeqBAIJ_7; 1551 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_7; 1552 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1553 break; 1554 } 1555 } 1556 B->ops->destroy = MatDestroy_SeqBAIJ; 1557 B->ops->view = MatView_SeqBAIJ; 1558 B->factor = 0; 1559 B->lupivotthreshold = 1.0; 1560 B->mapping = 0; 1561 b->row = 0; 1562 b->col = 0; 1563 b->icol = 0; 1564 b->reallocs = 0; 1565 b->saved_values = 0; 1566 1567 b->m = m; B->m = m; B->M = m; 1568 b->n = n; B->n = n; B->N = n; 1569 1570 ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr); 1571 ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr); 1572 1573 b->mbs = mbs; 1574 b->nbs = nbs; 1575 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(b->imax); 1576 if (!nnz) { 1577 if (nz == PETSC_DEFAULT) nz = 5; 1578 else if (nz <= 0) nz = 1; 1579 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1580 nz = nz*mbs; 1581 } else { 1582 nz = 0; 1583 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1584 } 1585 1586 /* allocate the matrix space */ 1587 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int); 1588 b->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a); 1589 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1590 b->j = (int *) (b->a + nz*bs2); 1591 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1592 b->i = b->j + nz; 1593 b->singlemalloc = PETSC_TRUE; 1594 1595 b->i[0] = 0; 1596 for (i=1; i<mbs+1; i++) { 1597 b->i[i] = b->i[i-1] + b->imax[i-1]; 1598 } 1599 1600 /* b->ilen will count nonzeros in each block row so far. */ 1601 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1602 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1603 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1604 1605 b->bs = bs; 1606 b->bs2 = bs2; 1607 b->mbs = mbs; 1608 b->nz = 0; 1609 b->maxnz = nz*bs2; 1610 b->sorted = PETSC_FALSE; 1611 b->roworiented = PETSC_TRUE; 1612 b->nonew = 0; 1613 b->diag = 0; 1614 b->solve_work = 0; 1615 b->mult_work = 0; 1616 b->spptr = 0; 1617 B->info.nz_unneeded = (double)b->maxnz; 1618 1619 *A = B; 1620 ierr = OptionsHasName(PETSC_NULL,"-help", &flg);CHKERRQ(ierr); 1621 if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); } 1622 1623 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1624 "MatStoreValues_SeqBAIJ", 1625 (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1626 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1627 "MatRetrieveValues_SeqBAIJ", 1628 (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1629 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1630 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1631 (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 1635 #undef __FUNC__ 1636 #define __FUNC__ "MatDuplicate_SeqBAIJ" 1637 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1638 { 1639 Mat C; 1640 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1641 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1642 1643 PetscFunctionBegin; 1644 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix"); 1645 1646 *B = 0; 1647 PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView); 1648 PLogObjectCreate(C); 1649 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c); 1650 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1651 C->ops->destroy = MatDestroy_SeqBAIJ; 1652 C->ops->view = MatView_SeqBAIJ; 1653 C->factor = A->factor; 1654 c->row = 0; 1655 c->col = 0; 1656 c->icol = 0; 1657 c->saved_values = 0; 1658 C->assembled = PETSC_TRUE; 1659 1660 c->m = C->m = a->m; 1661 c->n = C->n = a->n; 1662 C->M = a->m; 1663 C->N = a->n; 1664 1665 c->bs = a->bs; 1666 c->bs2 = a->bs2; 1667 c->mbs = a->mbs; 1668 c->nbs = a->nbs; 1669 1670 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax); 1671 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen); 1672 for ( i=0; i<mbs; i++ ) { 1673 c->imax[i] = a->imax[i]; 1674 c->ilen[i] = a->ilen[i]; 1675 } 1676 1677 /* allocate the matrix space */ 1678 c->singlemalloc = PETSC_TRUE; 1679 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1680 c->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(c->a); 1681 c->j = (int *) (c->a + nz*bs2); 1682 c->i = c->j + nz; 1683 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1684 if (mbs > 0) { 1685 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1686 if (cpvalues == MAT_COPY_VALUES) { 1687 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1688 } else { 1689 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1690 } 1691 } 1692 1693 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1694 c->sorted = a->sorted; 1695 c->roworiented = a->roworiented; 1696 c->nonew = a->nonew; 1697 1698 if (a->diag) { 1699 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(c->diag); 1700 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1701 for ( i=0; i<mbs; i++ ) { 1702 c->diag[i] = a->diag[i]; 1703 } 1704 } else c->diag = 0; 1705 c->nz = a->nz; 1706 c->maxnz = a->maxnz; 1707 c->solve_work = 0; 1708 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1709 c->mult_work = 0; 1710 *B = C; 1711 ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1712 PetscFunctionReturn(0); 1713 } 1714 1715 #undef __FUNC__ 1716 #define __FUNC__ "MatLoad_SeqBAIJ" 1717 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1718 { 1719 Mat_SeqBAIJ *a; 1720 Mat B; 1721 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1722 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1723 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1724 int *masked, nmask,tmp,bs2,ishift; 1725 Scalar *aa; 1726 MPI_Comm comm = ((PetscObject) viewer)->comm; 1727 1728 PetscFunctionBegin; 1729 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1730 bs2 = bs*bs; 1731 1732 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1733 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor"); 1734 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1735 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1736 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object"); 1737 M = header[1]; N = header[2]; nz = header[3]; 1738 1739 if (header[3] < 0) { 1740 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ"); 1741 } 1742 1743 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 1744 1745 /* 1746 This code adds extra rows to make sure the number of rows is 1747 divisible by the blocksize 1748 */ 1749 mbs = M/bs; 1750 extra_rows = bs - M + bs*(mbs); 1751 if (extra_rows == bs) extra_rows = 0; 1752 else mbs++; 1753 if (extra_rows) { 1754 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1755 } 1756 1757 /* read in row lengths */ 1758 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1759 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1760 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1761 1762 /* read in column indices */ 1763 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) );CHKPTRQ(jj); 1764 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1765 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1766 1767 /* loop over row lengths determining block row lengths */ 1768 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1769 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1770 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) );CHKPTRQ(mask); 1771 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1772 masked = mask + mbs; 1773 rowcount = 0; nzcount = 0; 1774 for ( i=0; i<mbs; i++ ) { 1775 nmask = 0; 1776 for ( j=0; j<bs; j++ ) { 1777 kmax = rowlengths[rowcount]; 1778 for ( k=0; k<kmax; k++ ) { 1779 tmp = jj[nzcount++]/bs; 1780 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1781 } 1782 rowcount++; 1783 } 1784 browlengths[i] += nmask; 1785 /* zero out the mask elements we set */ 1786 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1787 } 1788 1789 /* create our matrix */ 1790 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1791 B = *A; 1792 a = (Mat_SeqBAIJ *) B->data; 1793 1794 /* set matrix "i" values */ 1795 a->i[0] = 0; 1796 for ( i=1; i<= mbs; i++ ) { 1797 a->i[i] = a->i[i-1] + browlengths[i-1]; 1798 a->ilen[i-1] = browlengths[i-1]; 1799 } 1800 a->nz = 0; 1801 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1802 1803 /* read in nonzero values */ 1804 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1805 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1806 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1807 1808 /* set "a" and "j" values into matrix */ 1809 nzcount = 0; jcount = 0; 1810 for ( i=0; i<mbs; i++ ) { 1811 nzcountb = nzcount; 1812 nmask = 0; 1813 for ( j=0; j<bs; j++ ) { 1814 kmax = rowlengths[i*bs+j]; 1815 for ( k=0; k<kmax; k++ ) { 1816 tmp = jj[nzcount++]/bs; 1817 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1818 } 1819 rowcount++; 1820 } 1821 /* sort the masked values */ 1822 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1823 1824 /* set "j" values into matrix */ 1825 maskcount = 1; 1826 for ( j=0; j<nmask; j++ ) { 1827 a->j[jcount++] = masked[j]; 1828 mask[masked[j]] = maskcount++; 1829 } 1830 /* set "a" values into matrix */ 1831 ishift = bs2*a->i[i]; 1832 for ( j=0; j<bs; j++ ) { 1833 kmax = rowlengths[i*bs+j]; 1834 for ( k=0; k<kmax; k++ ) { 1835 tmp = jj[nzcountb]/bs ; 1836 block = mask[tmp] - 1; 1837 point = jj[nzcountb] - bs*tmp; 1838 idx = ishift + bs2*block + j + bs*point; 1839 a->a[idx] = aa[nzcountb++]; 1840 } 1841 } 1842 /* zero out the mask elements we set */ 1843 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1844 } 1845 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix"); 1846 1847 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1848 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1849 ierr = PetscFree(aa);CHKERRQ(ierr); 1850 ierr = PetscFree(jj);CHKERRQ(ierr); 1851 ierr = PetscFree(mask);CHKERRQ(ierr); 1852 1853 B->assembled = PETSC_TRUE; 1854 1855 ierr = MatView_Private(B);CHKERRQ(ierr); 1856 PetscFunctionReturn(0); 1857 } 1858 1859 1860 1861