1 /*$Id: baij.c,v 1.192 1999/11/20 18:36:47 bsmith Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "sys.h" 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNC__ 18 #define __FUNC__ "MatMissingDiagonal_SeqBAIJ" 19 int MatMissingDiagonal_SeqBAIJ(Mat A) 20 { 21 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 22 int *diag, *jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for ( i=0; i<a->mbs; i++ ) { 28 if (jj[diag[i]] != i) { 29 SETERRQ1(1,1,"Matrix is missing diagonal number %d",i); 30 } 31 } 32 PetscFunctionReturn(0); 33 } 34 35 #undef __FUNC__ 36 #define __FUNC__ "MatMarkDiagonal_SeqBAIJ" 37 int MatMarkDiagonal_SeqBAIJ(Mat A) 38 { 39 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 40 int i,j, *diag, m = a->mbs; 41 42 PetscFunctionBegin; 43 if (a->diag) PetscFunctionReturn(0); 44 45 diag = (int *) PetscMalloc( (m+1)*sizeof(int));CHKPTRQ(diag); 46 PLogObjectMemory(A,(m+1)*sizeof(int)); 47 for ( i=0; i<m; i++ ) { 48 diag[i] = a->i[i+1]; 49 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 50 if (a->j[j] == i) { 51 diag[i] = j; 52 break; 53 } 54 } 55 } 56 a->diag = diag; 57 PetscFunctionReturn(0); 58 } 59 60 61 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 62 63 #undef __FUNC__ 64 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 65 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 66 PetscTruth *done) 67 { 68 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 69 int ierr, n = a->mbs,i; 70 71 PetscFunctionBegin; 72 *nn = n; 73 if (!ia) PetscFunctionReturn(0); 74 if (symmetric) { 75 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 76 } else if (oshift == 1) { 77 /* temporarily add 1 to i and j indices */ 78 int nz = a->i[n] + 1; 79 for ( i=0; i<nz; i++ ) a->j[i]++; 80 for ( i=0; i<n+1; i++ ) a->i[i]++; 81 *ia = a->i; *ja = a->j; 82 } else { 83 *ia = a->i; *ja = a->j; 84 } 85 86 PetscFunctionReturn(0); 87 } 88 89 #undef __FUNC__ 90 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 91 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 92 PetscTruth *done) 93 { 94 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 95 int i,n = a->mbs,ierr; 96 97 PetscFunctionBegin; 98 if (!ia) PetscFunctionReturn(0); 99 if (symmetric) { 100 ierr = PetscFree(*ia);CHKERRQ(ierr); 101 ierr = PetscFree(*ja);CHKERRQ(ierr); 102 } else if (oshift == 1) { 103 int nz = a->i[n]; 104 for ( i=0; i<nz; i++ ) a->j[i]--; 105 for ( i=0; i<n+1; i++ ) a->i[i]--; 106 } 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNC__ 111 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 112 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 113 { 114 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 115 116 PetscFunctionBegin; 117 *bs = baij->bs; 118 PetscFunctionReturn(0); 119 } 120 121 122 #undef __FUNC__ 123 #define __FUNC__ "MatDestroy_SeqBAIJ" 124 int MatDestroy_SeqBAIJ(Mat A) 125 { 126 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 127 int ierr; 128 129 PetscFunctionBegin; 130 if (A->mapping) { 131 ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr); 132 } 133 if (A->bmapping) { 134 ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr); 135 } 136 if (A->rmap) { 137 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 138 } 139 if (A->cmap) { 140 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 141 } 142 #if defined(PETSC_USE_LOG) 143 PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 144 #endif 145 ierr = PetscFree(a->a);CHKERRQ(ierr); 146 if (!a->singlemalloc) { 147 ierr = PetscFree(a->i);CHKERRQ(ierr); 148 ierr = PetscFree(a->j);CHKERRQ(ierr); 149 } 150 if (a->row) { 151 ierr = ISDestroy(a->row);CHKERRQ(ierr); 152 } 153 if (a->col) { 154 ierr = ISDestroy(a->col);CHKERRQ(ierr); 155 } 156 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 157 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 158 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 159 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 160 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 161 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 162 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 163 ierr = PetscFree(a);CHKERRQ(ierr); 164 PLogObjectDestroy(A); 165 PetscHeaderDestroy(A); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNC__ 170 #define __FUNC__ "MatSetOption_SeqBAIJ" 171 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 172 { 173 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 174 175 PetscFunctionBegin; 176 if (op == MAT_ROW_ORIENTED) a->roworiented = PETSC_TRUE; 177 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = PETSC_FALSE; 178 else if (op == MAT_COLUMNS_SORTED) a->sorted = PETSC_TRUE; 179 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = PETSC_FALSE; 180 else if (op == MAT_KEEP_ZEROED_ROWS) a->keepzeroedrows = PETSC_TRUE; 181 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 182 else if (op == MAT_NEW_NONZERO_LOCATION_ERR) a->nonew = -1; 183 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR) a->nonew = -2; 184 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 185 else if (op == MAT_ROWS_SORTED || 186 op == MAT_ROWS_UNSORTED || 187 op == MAT_SYMMETRIC || 188 op == MAT_STRUCTURALLY_SYMMETRIC || 189 op == MAT_YES_NEW_DIAGONALS || 190 op == MAT_IGNORE_OFF_PROC_ENTRIES || 191 op == MAT_USE_HASH_TABLE) { 192 PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 193 } else if (op == MAT_NO_NEW_DIAGONALS) { 194 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 195 } else { 196 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 197 } 198 PetscFunctionReturn(0); 199 } 200 201 202 #undef __FUNC__ 203 #define __FUNC__ "MatGetSize_SeqBAIJ" 204 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 205 { 206 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 207 208 PetscFunctionBegin; 209 if (m) *m = a->m; 210 if (n) *n = a->n; 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNC__ 215 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 216 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 217 { 218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 219 220 PetscFunctionBegin; 221 *m = 0; *n = a->m; 222 PetscFunctionReturn(0); 223 } 224 225 #undef __FUNC__ 226 #define __FUNC__ "MatGetRow_SeqBAIJ" 227 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 228 { 229 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 230 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 231 MatScalar *aa,*aa_i; 232 Scalar *v_i; 233 234 PetscFunctionBegin; 235 bs = a->bs; 236 ai = a->i; 237 aj = a->j; 238 aa = a->a; 239 bs2 = a->bs2; 240 241 if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range"); 242 243 bn = row/bs; /* Block number */ 244 bp = row % bs; /* Block Position */ 245 M = ai[bn+1] - ai[bn]; 246 *nz = bs*M; 247 248 if (v) { 249 *v = 0; 250 if (*nz) { 251 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) );CHKPTRQ(*v); 252 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 253 v_i = *v + i*bs; 254 aa_i = aa + bs2*(ai[bn] + i); 255 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 256 } 257 } 258 } 259 260 if (idx) { 261 *idx = 0; 262 if (*nz) { 263 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) );CHKPTRQ(*idx); 264 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 265 idx_i = *idx + i*bs; 266 itmp = bs*aj[ai[bn] + i]; 267 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 268 } 269 } 270 } 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNC__ 275 #define __FUNC__ "MatRestoreRow_SeqBAIJ" 276 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 277 { 278 int ierr; 279 280 PetscFunctionBegin; 281 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 282 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNC__ 287 #define __FUNC__ "MatTranspose_SeqBAIJ" 288 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 289 { 290 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 291 Mat C; 292 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 293 int *rows,*cols,bs2=a->bs2; 294 MatScalar *array = a->a; 295 296 PetscFunctionBegin; 297 if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place"); 298 col = (int *) PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col); 299 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 300 301 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 302 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 303 ierr = PetscFree(col);CHKERRQ(ierr); 304 rows = (int *) PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows); 305 cols = rows + bs; 306 for ( i=0; i<mbs; i++ ) { 307 cols[0] = i*bs; 308 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 309 len = ai[i+1] - ai[i]; 310 for ( j=0; j<len; j++ ) { 311 rows[0] = (*aj++)*bs; 312 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 313 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 314 array += bs2; 315 } 316 } 317 ierr = PetscFree(rows);CHKERRQ(ierr); 318 319 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 322 if (B) { 323 *B = C; 324 } else { 325 PetscOps *Abops; 326 MatOps Aops; 327 328 /* This isn't really an in-place transpose */ 329 ierr = PetscFree(a->a);CHKERRQ(ierr); 330 if (!a->singlemalloc) { 331 ierr = PetscFree(a->i);CHKERRQ(ierr); 332 ierr = PetscFree(a->j);CHKERRQ(ierr); 333 } 334 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 335 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 336 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 337 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 338 ierr = PetscFree(a);CHKERRQ(ierr); 339 340 341 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 342 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 343 344 /* 345 This is horrible, horrible code. We need to keep the 346 A pointers for the bops and ops but copy everything 347 else from C. 348 */ 349 Abops = A->bops; 350 Aops = A->ops; 351 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 352 A->bops = Abops; 353 A->ops = Aops; 354 A->qlist = 0; 355 356 PetscHeaderDestroy(C); 357 } 358 PetscFunctionReturn(0); 359 } 360 361 #undef __FUNC__ 362 #define __FUNC__ "MatView_SeqBAIJ_Binary" 363 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 364 { 365 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 366 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 367 Scalar *aa; 368 FILE *file; 369 370 PetscFunctionBegin; 371 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 372 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 373 col_lens[0] = MAT_COOKIE; 374 375 col_lens[1] = a->m; 376 col_lens[2] = a->n; 377 col_lens[3] = a->nz*bs2; 378 379 /* store lengths of each row and write (including header) to file */ 380 count = 0; 381 for ( i=0; i<a->mbs; i++ ) { 382 for ( j=0; j<bs; j++ ) { 383 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 384 } 385 } 386 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr); 387 ierr = PetscFree(col_lens);CHKERRQ(ierr); 388 389 /* store column indices (zero start index) */ 390 jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) );CHKPTRQ(jj); 391 count = 0; 392 for ( i=0; i<a->mbs; i++ ) { 393 for ( j=0; j<bs; j++ ) { 394 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 395 for ( l=0; l<bs; l++ ) { 396 jj[count++] = bs*a->j[k] + l; 397 } 398 } 399 } 400 } 401 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 402 ierr = PetscFree(jj);CHKERRQ(ierr); 403 404 /* store nonzero values */ 405 aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa); 406 count = 0; 407 for ( i=0; i<a->mbs; i++ ) { 408 for ( j=0; j<bs; j++ ) { 409 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 410 for ( l=0; l<bs; l++ ) { 411 aa[count++] = a->a[bs2*k + l*bs + j]; 412 } 413 } 414 } 415 } 416 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 417 ierr = PetscFree(aa);CHKERRQ(ierr); 418 419 ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 420 if (file) { 421 fprintf(file,"-matload_block_size %d\n",a->bs); 422 } 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNC__ 427 #define __FUNC__ "MatView_SeqBAIJ_ASCII" 428 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 429 { 430 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 431 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 432 char *outputname; 433 434 PetscFunctionBegin; 435 ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr); 436 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 437 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 438 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 439 } else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 440 SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported"); 441 } else if (format == VIEWER_FORMAT_ASCII_COMMON) { 442 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 443 for ( i=0; i<a->mbs; i++ ) { 444 for ( j=0; j<bs; j++ ) { 445 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 446 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 447 for ( l=0; l<bs; l++ ) { 448 #if defined(PETSC_USE_COMPLEX) 449 if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 450 ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 451 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 452 } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 453 ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 454 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 455 } else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 456 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 457 } 458 #else 459 if (a->a[bs2*k + l*bs + j] != 0.0) { 460 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 461 } 462 #endif 463 } 464 } 465 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 466 } 467 } 468 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 469 } else { 470 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 471 for ( i=0; i<a->mbs; i++ ) { 472 for ( j=0; j<bs; j++ ) { 473 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 474 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 475 for ( l=0; l<bs; l++ ) { 476 #if defined(PETSC_USE_COMPLEX) 477 if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) { 478 ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 479 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 480 } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) { 481 ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 482 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 483 } else { 484 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 485 } 486 #else 487 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 488 #endif 489 } 490 } 491 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 492 } 493 } 494 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 495 } 496 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNC__ 501 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom" 502 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa) 503 { 504 Mat A = (Mat) Aa; 505 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 506 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 507 double xl,yl,xr,yr,x_l,x_r,y_l,y_r; 508 MatScalar *aa; 509 MPI_Comm comm; 510 Viewer viewer; 511 512 PetscFunctionBegin; 513 /* 514 This is nasty. If this is called from an originally parallel matrix 515 then all processes call this, but only the first has the matrix so the 516 rest should return immediately. 517 */ 518 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 519 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 520 if (rank) PetscFunctionReturn(0); 521 522 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr); 523 524 ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 525 526 /* loop over matrix elements drawing boxes */ 527 color = DRAW_BLUE; 528 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 529 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 530 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 531 x_l = a->j[j]*bs; x_r = x_l + 1.0; 532 aa = a->a + j*bs2; 533 for ( k=0; k<bs; k++ ) { 534 for ( l=0; l<bs; l++ ) { 535 if (PetscReal(*aa++) >= 0.) continue; 536 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 537 } 538 } 539 } 540 } 541 color = DRAW_CYAN; 542 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 543 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 544 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 545 x_l = a->j[j]*bs; x_r = x_l + 1.0; 546 aa = a->a + j*bs2; 547 for ( k=0; k<bs; k++ ) { 548 for ( l=0; l<bs; l++ ) { 549 if (PetscReal(*aa++) != 0.) continue; 550 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 551 } 552 } 553 } 554 } 555 556 color = DRAW_RED; 557 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 558 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 559 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 560 x_l = a->j[j]*bs; x_r = x_l + 1.0; 561 aa = a->a + j*bs2; 562 for ( k=0; k<bs; k++ ) { 563 for ( l=0; l<bs; l++ ) { 564 if (PetscReal(*aa++) <= 0.) continue; 565 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 566 } 567 } 568 } 569 } 570 PetscFunctionReturn(0); 571 } 572 573 #undef __FUNC__ 574 #define __FUNC__ "MatView_SeqBAIJ_Draw" 575 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 576 { 577 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 578 int ierr; 579 double xl,yl,xr,yr,w,h; 580 Draw draw; 581 PetscTruth isnull; 582 583 PetscFunctionBegin; 584 585 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 586 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 587 588 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 589 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 590 xr += w; yr += h; xl = -w; yl = -h; 591 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 592 ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 593 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNC__ 598 #define __FUNC__ "MatView_SeqBAIJ" 599 int MatView_SeqBAIJ(Mat A,Viewer viewer) 600 { 601 int ierr; 602 PetscTruth issocket,isascii,isbinary,isdraw; 603 604 PetscFunctionBegin; 605 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 606 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 607 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 608 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 609 if (issocket) { 610 SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported"); 611 } else if (isascii){ 612 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 613 } else if (isbinary) { 614 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 615 } else if (isdraw) { 616 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 617 } else { 618 SETERRQ1(1,1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 619 } 620 PetscFunctionReturn(0); 621 } 622 623 624 #undef __FUNC__ 625 #define __FUNC__ "MatGetValues_SeqBAIJ" 626 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 627 { 628 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 629 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 630 int *ai = a->i, *ailen = a->ilen; 631 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 632 MatScalar *ap, *aa = a->a, zero = 0.0; 633 634 PetscFunctionBegin; 635 for ( k=0; k<m; k++ ) { /* loop over rows */ 636 row = im[k]; brow = row/bs; 637 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 638 if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 639 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 640 nrow = ailen[brow]; 641 for ( l=0; l<n; l++ ) { /* loop over columns */ 642 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 643 if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 644 col = in[l] ; 645 bcol = col/bs; 646 cidx = col%bs; 647 ridx = row%bs; 648 high = nrow; 649 low = 0; /* assume unsorted */ 650 while (high-low > 5) { 651 t = (low+high)/2; 652 if (rp[t] > bcol) high = t; 653 else low = t; 654 } 655 for ( i=low; i<high; i++ ) { 656 if (rp[i] > bcol) break; 657 if (rp[i] == bcol) { 658 *v++ = ap[bs2*i+bs*cidx+ridx]; 659 goto finished; 660 } 661 } 662 *v++ = zero; 663 finished:; 664 } 665 } 666 PetscFunctionReturn(0); 667 } 668 669 670 #undef __FUNC__ 671 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 672 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 673 { 674 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 675 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 676 int *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented; 677 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 678 Scalar *value = v; 679 MatScalar *ap,*aa=a->a,*bap; 680 681 PetscFunctionBegin; 682 if (roworiented) { 683 stepval = (n-1)*bs; 684 } else { 685 stepval = (m-1)*bs; 686 } 687 for ( k=0; k<m; k++ ) { /* loop over added rows */ 688 row = im[k]; 689 if (row < 0) continue; 690 #if defined(PETSC_USE_BOPT_g) 691 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 692 #endif 693 rp = aj + ai[row]; 694 ap = aa + bs2*ai[row]; 695 rmax = imax[row]; 696 nrow = ailen[row]; 697 low = 0; 698 for ( l=0; l<n; l++ ) { /* loop over added columns */ 699 if (in[l] < 0) continue; 700 #if defined(PETSC_USE_BOPT_g) 701 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 702 #endif 703 col = in[l]; 704 if (roworiented) { 705 value = v + k*(stepval+bs)*bs + l*bs; 706 } else { 707 value = v + l*(stepval+bs)*bs + k*bs; 708 } 709 if (!sorted) low = 0; high = nrow; 710 while (high-low > 7) { 711 t = (low+high)/2; 712 if (rp[t] > col) high = t; 713 else low = t; 714 } 715 for ( i=low; i<high; i++ ) { 716 if (rp[i] > col) break; 717 if (rp[i] == col) { 718 bap = ap + bs2*i; 719 if (roworiented) { 720 if (is == ADD_VALUES) { 721 for ( ii=0; ii<bs; ii++,value+=stepval ) { 722 for (jj=ii; jj<bs2; jj+=bs ) { 723 bap[jj] += *value++; 724 } 725 } 726 } else { 727 for ( ii=0; ii<bs; ii++,value+=stepval ) { 728 for (jj=ii; jj<bs2; jj+=bs ) { 729 bap[jj] = *value++; 730 } 731 } 732 } 733 } else { 734 if (is == ADD_VALUES) { 735 for ( ii=0; ii<bs; ii++,value+=stepval ) { 736 for (jj=0; jj<bs; jj++ ) { 737 *bap++ += *value++; 738 } 739 } 740 } else { 741 for ( ii=0; ii<bs; ii++,value+=stepval ) { 742 for (jj=0; jj<bs; jj++ ) { 743 *bap++ = *value++; 744 } 745 } 746 } 747 } 748 goto noinsert2; 749 } 750 } 751 if (nonew == 1) goto noinsert2; 752 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 753 if (nrow >= rmax) { 754 /* there is no extra room in row, therefore enlarge */ 755 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 756 MatScalar *new_a; 757 758 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 759 760 /* malloc new storage space */ 761 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 762 new_a = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a); 763 new_j = (int *) (new_a + bs2*new_nz); 764 new_i = new_j + new_nz; 765 766 /* copy over old data into new slots */ 767 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 768 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 769 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 770 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 771 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 772 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 773 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 774 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 775 /* free up old matrix storage */ 776 ierr = PetscFree(a->a);CHKERRQ(ierr); 777 if (!a->singlemalloc) { 778 ierr = PetscFree(a->i);CHKERRQ(ierr); 779 ierr = PetscFree(a->j);CHKERRQ(ierr); 780 } 781 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 782 a->singlemalloc = PETSC_TRUE; 783 784 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 785 rmax = imax[row] = imax[row] + CHUNKSIZE; 786 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 787 a->maxnz += bs2*CHUNKSIZE; 788 a->reallocs++; 789 a->nz++; 790 } 791 N = nrow++ - 1; 792 /* shift up all the later entries in this row */ 793 for ( ii=N; ii>=i; ii-- ) { 794 rp[ii+1] = rp[ii]; 795 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 796 } 797 if (N >= i) { 798 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 799 } 800 rp[i] = col; 801 bap = ap + bs2*i; 802 if (roworiented) { 803 for ( ii=0; ii<bs; ii++,value+=stepval) { 804 for (jj=ii; jj<bs2; jj+=bs ) { 805 bap[jj] = *value++; 806 } 807 } 808 } else { 809 for ( ii=0; ii<bs; ii++,value+=stepval ) { 810 for (jj=0; jj<bs; jj++ ) { 811 *bap++ = *value++; 812 } 813 } 814 } 815 noinsert2:; 816 low = i; 817 } 818 ailen[row] = nrow; 819 } 820 PetscFunctionReturn(0); 821 } 822 823 824 #undef __FUNC__ 825 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 826 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 827 { 828 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 829 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 830 int m = a->m,*ip, N, *ailen = a->ilen; 831 int mbs = a->mbs, bs2 = a->bs2,rmax = 0,ierr; 832 MatScalar *aa = a->a, *ap; 833 834 PetscFunctionBegin; 835 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 836 837 if (m) rmax = ailen[0]; 838 for ( i=1; i<mbs; i++ ) { 839 /* move each row back by the amount of empty slots (fshift) before it*/ 840 fshift += imax[i-1] - ailen[i-1]; 841 rmax = PetscMax(rmax,ailen[i]); 842 if (fshift) { 843 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 844 N = ailen[i]; 845 for ( j=0; j<N; j++ ) { 846 ip[j-fshift] = ip[j]; 847 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 848 } 849 } 850 ai[i] = ai[i-1] + ailen[i-1]; 851 } 852 if (mbs) { 853 fshift += imax[mbs-1] - ailen[mbs-1]; 854 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 855 } 856 /* reset ilen and imax for each row */ 857 for ( i=0; i<mbs; i++ ) { 858 ailen[i] = imax[i] = ai[i+1] - ai[i]; 859 } 860 a->nz = ai[mbs]; 861 862 /* diagonals may have moved, so kill the diagonal pointers */ 863 if (fshift && a->diag) { 864 ierr = PetscFree(a->diag);CHKERRQ(ierr); 865 PLogObjectMemory(A,-(m+1)*sizeof(int)); 866 a->diag = 0; 867 } 868 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 869 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 870 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 871 a->reallocs); 872 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 873 a->reallocs = 0; 874 A->info.nz_unneeded = (double)fshift*bs2; 875 876 PetscFunctionReturn(0); 877 } 878 879 880 881 /* 882 This function returns an array of flags which indicate the locations of contiguous 883 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 884 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 885 Assume: sizes should be long enough to hold all the values. 886 */ 887 #undef __FUNC__ 888 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks" 889 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 890 { 891 int i,j,k,row; 892 PetscTruth flg; 893 894 PetscFunctionBegin; 895 for ( i=0,j=0; i<n; j++ ) { 896 row = idx[i]; 897 if (row%bs!=0) { /* Not the begining of a block */ 898 sizes[j] = 1; 899 i++; 900 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 901 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 902 i++; 903 } else { /* Begining of the block, so check if the complete block exists */ 904 flg = PETSC_TRUE; 905 for ( k=1; k<bs; k++ ) { 906 if (row+k != idx[i+k]) { /* break in the block */ 907 flg = PETSC_FALSE; 908 break; 909 } 910 } 911 if (flg == PETSC_TRUE) { /* No break in the bs */ 912 sizes[j] = bs; 913 i+= bs; 914 } else { 915 sizes[j] = 1; 916 i++; 917 } 918 } 919 } 920 *bs_max = j; 921 PetscFunctionReturn(0); 922 } 923 924 #undef __FUNC__ 925 #define __FUNC__ "MatZeroRows_SeqBAIJ" 926 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 927 { 928 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 929 int ierr,i,j,k,count,is_n,*is_idx,*rows; 930 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 931 Scalar zero = 0.0; 932 MatScalar *aa; 933 934 PetscFunctionBegin; 935 /* Make a copy of the IS and sort it */ 936 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 937 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 938 939 /* allocate memory for rows,sizes */ 940 rows = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows); 941 sizes = rows + is_n; 942 943 /* initialize copy IS valurs to rows, and sort them */ 944 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 945 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 946 if (baij->keepzeroedrows) { 947 for ( i=0; i<is_n; i++ ) { sizes[i] = 1; } 948 bs_max = is_n; 949 } else { 950 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 951 } 952 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 953 954 for ( i=0,j=0; i<bs_max; j+=sizes[i],i++ ) { 955 row = rows[j]; 956 if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row); 957 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 958 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 959 if (sizes[i] == bs && !baij->keepzeroedrows) { 960 if (diag) { 961 if (baij->ilen[row/bs] > 0) { 962 baij->ilen[row/bs] = 1; 963 baij->j[baij->i[row/bs]] = row/bs; 964 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 965 } 966 /* Now insert all the diagoanl values for this bs */ 967 for ( k=0; k<bs; k++ ) { 968 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 969 } 970 } else { /* (!diag) */ 971 baij->ilen[row/bs] = 0; 972 } /* end (!diag) */ 973 } else { /* (sizes[i] != bs) */ 974 #if defined (PETSC_USE_DEBUG) 975 if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1"); 976 #endif 977 for ( k=0; k<count; k++ ) { 978 aa[0] = zero; 979 aa+=bs; 980 } 981 if (diag) { 982 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 983 } 984 } 985 } 986 987 ierr = PetscFree(rows);CHKERRQ(ierr); 988 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNC__ 993 #define __FUNC__ "MatSetValues_SeqBAIJ" 994 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 995 { 996 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 997 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 998 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 999 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1000 int ridx,cidx,bs2=a->bs2,ierr; 1001 MatScalar *ap,value,*aa=a->a,*bap; 1002 1003 PetscFunctionBegin; 1004 for ( k=0; k<m; k++ ) { /* loop over added rows */ 1005 row = im[k]; brow = row/bs; 1006 if (row < 0) continue; 1007 #if defined(PETSC_USE_BOPT_g) 1008 if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m); 1009 #endif 1010 rp = aj + ai[brow]; 1011 ap = aa + bs2*ai[brow]; 1012 rmax = imax[brow]; 1013 nrow = ailen[brow]; 1014 low = 0; 1015 for ( l=0; l<n; l++ ) { /* loop over added columns */ 1016 if (in[l] < 0) continue; 1017 #if defined(PETSC_USE_BOPT_g) 1018 if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n); 1019 #endif 1020 col = in[l]; bcol = col/bs; 1021 ridx = row % bs; cidx = col % bs; 1022 if (roworiented) { 1023 value = v[l + k*n]; 1024 } else { 1025 value = v[k + l*m]; 1026 } 1027 if (!sorted) low = 0; high = nrow; 1028 while (high-low > 7) { 1029 t = (low+high)/2; 1030 if (rp[t] > bcol) high = t; 1031 else low = t; 1032 } 1033 for ( i=low; i<high; i++ ) { 1034 if (rp[i] > bcol) break; 1035 if (rp[i] == bcol) { 1036 bap = ap + bs2*i + bs*cidx + ridx; 1037 if (is == ADD_VALUES) *bap += value; 1038 else *bap = value; 1039 goto noinsert1; 1040 } 1041 } 1042 if (nonew == 1) goto noinsert1; 1043 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1044 if (nrow >= rmax) { 1045 /* there is no extra room in row, therefore enlarge */ 1046 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1047 MatScalar *new_a; 1048 1049 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1050 1051 /* Malloc new storage space */ 1052 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1053 new_a = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a); 1054 new_j = (int *) (new_a + bs2*new_nz); 1055 new_i = new_j + new_nz; 1056 1057 /* copy over old data into new slots */ 1058 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 1059 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1060 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1061 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1062 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1063 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1064 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1065 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1066 /* free up old matrix storage */ 1067 ierr = PetscFree(a->a);CHKERRQ(ierr); 1068 if (!a->singlemalloc) { 1069 ierr = PetscFree(a->i);CHKERRQ(ierr); 1070 ierr = PetscFree(a->j);CHKERRQ(ierr); 1071 } 1072 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1073 a->singlemalloc = PETSC_TRUE; 1074 1075 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1076 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1077 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1078 a->maxnz += bs2*CHUNKSIZE; 1079 a->reallocs++; 1080 a->nz++; 1081 } 1082 N = nrow++ - 1; 1083 /* shift up all the later entries in this row */ 1084 for ( ii=N; ii>=i; ii-- ) { 1085 rp[ii+1] = rp[ii]; 1086 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1087 } 1088 if (N>=i) { 1089 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1090 } 1091 rp[i] = bcol; 1092 ap[bs2*i + bs*cidx + ridx] = value; 1093 noinsert1:; 1094 low = i; 1095 } 1096 ailen[brow] = nrow; 1097 } 1098 PetscFunctionReturn(0); 1099 } 1100 1101 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1102 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1103 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1104 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1105 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 1106 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec); 1107 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 1108 extern int MatScale_SeqBAIJ(Scalar*,Mat); 1109 extern int MatNorm_SeqBAIJ(Mat,NormType,double *); 1110 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*); 1111 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec); 1112 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec); 1113 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *); 1114 extern int MatZeroEntries_SeqBAIJ(Mat); 1115 1116 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1117 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1118 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1119 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1120 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1121 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1122 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec); 1123 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1124 extern int MatSolveTrans_SeqBAIJ_7(Mat,Vec,Vec); 1125 extern int MatSolveTrans_SeqBAIJ_6(Mat,Vec,Vec); 1126 extern int MatSolveTrans_SeqBAIJ_5(Mat,Vec,Vec); 1127 extern int MatSolveTrans_SeqBAIJ_4(Mat,Vec,Vec); 1128 extern int MatSolveTrans_SeqBAIJ_3(Mat,Vec,Vec); 1129 extern int MatSolveTrans_SeqBAIJ_2(Mat,Vec,Vec); 1130 extern int MatSolveTrans_SeqBAIJ_1(Mat,Vec,Vec); 1131 1132 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1133 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1134 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1135 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1136 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1137 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1138 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*); 1139 1140 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec); 1141 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec); 1142 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec); 1143 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec); 1144 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec); 1145 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec); 1146 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec); 1147 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec); 1148 1149 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec); 1150 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec); 1151 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec); 1152 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec); 1153 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec); 1154 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec); 1155 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec); 1156 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec); 1157 1158 #undef __FUNC__ 1159 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1160 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1161 { 1162 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1163 Mat outA; 1164 int ierr; 1165 PetscTruth row_identity, col_identity; 1166 1167 PetscFunctionBegin; 1168 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU"); 1169 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1170 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1171 if (!row_identity || !col_identity) { 1172 SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU"); 1173 } 1174 1175 outA = inA; 1176 inA->factor = FACTOR_LU; 1177 1178 if (!a->diag) { 1179 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1180 } 1181 /* 1182 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1183 for ILU(0) factorization with natural ordering 1184 */ 1185 switch (a->bs) { 1186 case 1: 1187 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_2_NaturalOrdering; 1188 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1189 case 2: 1190 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 1191 inA->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 1192 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_2_NaturalOrdering; 1193 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1194 break; 1195 case 3: 1196 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 1197 inA->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering; 1198 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_3_NaturalOrdering; 1199 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1200 break; 1201 case 4: 1202 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1203 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1204 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_4_NaturalOrdering; 1205 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1206 break; 1207 case 5: 1208 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 1209 inA->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 1210 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_5_NaturalOrdering; 1211 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1212 break; 1213 case 6: 1214 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 1215 inA->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering; 1216 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_6_NaturalOrdering; 1217 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1218 break; 1219 case 7: 1220 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 1221 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_7_NaturalOrdering; 1222 inA->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering; 1223 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1224 break; 1225 default: 1226 a->row = row; 1227 a->col = col; 1228 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1229 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1230 1231 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1232 ierr = ISInvertPermutation(col,&(a->icol));CHKERRQ(ierr); 1233 PLogObjectParent(inA,a->icol); 1234 1235 if (!a->solve_work) { 1236 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1237 PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1238 } 1239 } 1240 1241 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1242 1243 PetscFunctionReturn(0); 1244 } 1245 #undef __FUNC__ 1246 #define __FUNC__ "MatPrintHelp_SeqBAIJ" 1247 int MatPrintHelp_SeqBAIJ(Mat A) 1248 { 1249 static PetscTruth called = PETSC_FALSE; 1250 MPI_Comm comm = A->comm; 1251 int ierr; 1252 1253 PetscFunctionBegin; 1254 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1255 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1256 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1257 PetscFunctionReturn(0); 1258 } 1259 1260 EXTERN_C_BEGIN 1261 #undef __FUNC__ 1262 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1263 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1264 { 1265 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1266 int i,nz,n; 1267 1268 PetscFunctionBegin; 1269 nz = baij->maxnz; 1270 n = baij->n; 1271 for (i=0; i<nz; i++) { 1272 baij->j[i] = indices[i]; 1273 } 1274 baij->nz = nz; 1275 for ( i=0; i<n; i++ ) { 1276 baij->ilen[i] = baij->imax[i]; 1277 } 1278 1279 PetscFunctionReturn(0); 1280 } 1281 EXTERN_C_END 1282 1283 #undef __FUNC__ 1284 #define __FUNC__ "MatSeqBAIJSetColumnIndices" 1285 /*@ 1286 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1287 in the matrix. 1288 1289 Input Parameters: 1290 + mat - the SeqBAIJ matrix 1291 - indices - the column indices 1292 1293 Level: advanced 1294 1295 Notes: 1296 This can be called if you have precomputed the nonzero structure of the 1297 matrix and want to provide it to the matrix object to improve the performance 1298 of the MatSetValues() operation. 1299 1300 You MUST have set the correct numbers of nonzeros per row in the call to 1301 MatCreateSeqBAIJ(). 1302 1303 MUST be called before any calls to MatSetValues(); 1304 1305 @*/ 1306 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1307 { 1308 int ierr,(*f)(Mat,int *); 1309 1310 PetscFunctionBegin; 1311 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1312 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr); 1313 if (f) { 1314 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1315 } else { 1316 SETERRQ(1,1,"Wrong type of matrix to set column indices"); 1317 } 1318 PetscFunctionReturn(0); 1319 } 1320 1321 /* -------------------------------------------------------------------*/ 1322 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1323 MatGetRow_SeqBAIJ, 1324 MatRestoreRow_SeqBAIJ, 1325 MatMult_SeqBAIJ_N, 1326 MatMultAdd_SeqBAIJ_N, 1327 MatMultTrans_SeqBAIJ, 1328 MatMultTransAdd_SeqBAIJ, 1329 MatSolve_SeqBAIJ_N, 1330 0, 1331 0, 1332 0, 1333 MatLUFactor_SeqBAIJ, 1334 0, 1335 0, 1336 MatTranspose_SeqBAIJ, 1337 MatGetInfo_SeqBAIJ, 1338 MatEqual_SeqBAIJ, 1339 MatGetDiagonal_SeqBAIJ, 1340 MatDiagonalScale_SeqBAIJ, 1341 MatNorm_SeqBAIJ, 1342 0, 1343 MatAssemblyEnd_SeqBAIJ, 1344 0, 1345 MatSetOption_SeqBAIJ, 1346 MatZeroEntries_SeqBAIJ, 1347 MatZeroRows_SeqBAIJ, 1348 MatLUFactorSymbolic_SeqBAIJ, 1349 MatLUFactorNumeric_SeqBAIJ_N, 1350 0, 1351 0, 1352 MatGetSize_SeqBAIJ, 1353 MatGetSize_SeqBAIJ, 1354 MatGetOwnershipRange_SeqBAIJ, 1355 MatILUFactorSymbolic_SeqBAIJ, 1356 0, 1357 0, 1358 0, 1359 MatDuplicate_SeqBAIJ, 1360 0, 1361 0, 1362 MatILUFactor_SeqBAIJ, 1363 0, 1364 0, 1365 MatGetSubMatrices_SeqBAIJ, 1366 MatIncreaseOverlap_SeqBAIJ, 1367 MatGetValues_SeqBAIJ, 1368 0, 1369 MatPrintHelp_SeqBAIJ, 1370 MatScale_SeqBAIJ, 1371 0, 1372 0, 1373 0, 1374 MatGetBlockSize_SeqBAIJ, 1375 MatGetRowIJ_SeqBAIJ, 1376 MatRestoreRowIJ_SeqBAIJ, 1377 0, 1378 0, 1379 0, 1380 0, 1381 0, 1382 0, 1383 MatSetValuesBlocked_SeqBAIJ, 1384 MatGetSubMatrix_SeqBAIJ, 1385 0, 1386 0, 1387 MatGetMaps_Petsc}; 1388 1389 EXTERN_C_BEGIN 1390 #undef __FUNC__ 1391 #define __FUNC__ "MatStoreValues_SeqBAIJ" 1392 int MatStoreValues_SeqBAIJ(Mat mat) 1393 { 1394 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1395 int nz = aij->i[aij->m]*aij->bs*aij->bs2; 1396 int ierr; 1397 1398 PetscFunctionBegin; 1399 if (aij->nonew != 1) { 1400 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1401 } 1402 1403 /* allocate space for values if not already there */ 1404 if (!aij->saved_values) { 1405 aij->saved_values = (Scalar *) PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values); 1406 } 1407 1408 /* copy values over */ 1409 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr); 1410 PetscFunctionReturn(0); 1411 } 1412 EXTERN_C_END 1413 1414 EXTERN_C_BEGIN 1415 #undef __FUNC__ 1416 #define __FUNC__ "MatRetrieveValues_SeqBAIJ" 1417 int MatRetrieveValues_SeqBAIJ(Mat mat) 1418 { 1419 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1420 int nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr; 1421 1422 PetscFunctionBegin; 1423 if (aij->nonew != 1) { 1424 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1425 } 1426 if (!aij->saved_values) { 1427 SETERRQ(1,1,"Must call MatStoreValues(A);first"); 1428 } 1429 1430 /* copy values over */ 1431 ierr = PetscMemcpy(aij->a, aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr); 1432 PetscFunctionReturn(0); 1433 } 1434 EXTERN_C_END 1435 1436 #undef __FUNC__ 1437 #define __FUNC__ "MatCreateSeqBAIJ" 1438 /*@C 1439 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1440 compressed row) format. For good matrix assembly performance the 1441 user should preallocate the matrix storage by setting the parameter nz 1442 (or the array nnz). By setting these parameters accurately, performance 1443 during matrix assembly can be increased by more than a factor of 50. 1444 1445 Collective on MPI_Comm 1446 1447 Input Parameters: 1448 + comm - MPI communicator, set to PETSC_COMM_SELF 1449 . bs - size of block 1450 . m - number of rows 1451 . n - number of columns 1452 . nz - number of block nonzeros per block row (same for all rows) 1453 - nnz - array containing the number of block nonzeros in the various block rows 1454 (possibly different for each block row) or PETSC_NULL 1455 1456 Output Parameter: 1457 . A - the matrix 1458 1459 Options Database Keys: 1460 . -mat_no_unroll - uses code that does not unroll the loops in the 1461 block calculations (much slower) 1462 . -mat_block_size - size of the blocks to use 1463 1464 Level: intermediate 1465 1466 Notes: 1467 The block AIJ format is fully compatible with standard Fortran 77 1468 storage. That is, the stored row and column indices can begin at 1469 either one (as in Fortran) or zero. See the users' manual for details. 1470 1471 Specify the preallocated storage with either nz or nnz (not both). 1472 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1473 allocation. For additional details, see the users manual chapter on 1474 matrices. 1475 1476 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1477 @*/ 1478 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1479 { 1480 Mat B; 1481 Mat_SeqBAIJ *b; 1482 int i,len,ierr,mbs,nbs,bs2,size; 1483 PetscTruth flg; 1484 1485 PetscFunctionBegin; 1486 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1487 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1"); 1488 1489 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1490 mbs = m/bs; 1491 nbs = n/bs; 1492 bs2 = bs*bs; 1493 1494 if (mbs*bs!=m || nbs*bs!=n) { 1495 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize"); 1496 } 1497 1498 if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"nz cannot be less than -2: value %d",nz); 1499 if (nnz) { 1500 for (i=0; i<mbs; i++) { 1501 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1502 } 1503 } 1504 1505 *A = 0; 1506 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView); 1507 PLogObjectCreate(B); 1508 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b); 1509 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1510 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1511 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1512 if (!flg) { 1513 switch (bs) { 1514 case 1: 1515 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1516 B->ops->solve = MatSolve_SeqBAIJ_1; 1517 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_1; 1518 B->ops->mult = MatMult_SeqBAIJ_1; 1519 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1520 break; 1521 case 2: 1522 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1523 B->ops->solve = MatSolve_SeqBAIJ_2; 1524 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_2; 1525 B->ops->mult = MatMult_SeqBAIJ_2; 1526 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1527 break; 1528 case 3: 1529 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1530 B->ops->solve = MatSolve_SeqBAIJ_3; 1531 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_3; 1532 B->ops->mult = MatMult_SeqBAIJ_3; 1533 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1534 break; 1535 case 4: 1536 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1537 B->ops->solve = MatSolve_SeqBAIJ_4; 1538 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_4; 1539 B->ops->mult = MatMult_SeqBAIJ_4; 1540 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1541 break; 1542 case 5: 1543 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1544 B->ops->solve = MatSolve_SeqBAIJ_5; 1545 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_5; 1546 B->ops->mult = MatMult_SeqBAIJ_5; 1547 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1548 break; 1549 case 6: 1550 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1551 B->ops->solve = MatSolve_SeqBAIJ_6; 1552 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_6; 1553 B->ops->mult = MatMult_SeqBAIJ_6; 1554 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1555 break; 1556 case 7: 1557 B->ops->mult = MatMult_SeqBAIJ_7; 1558 B->ops->solve = MatSolve_SeqBAIJ_7; 1559 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_7; 1560 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1561 break; 1562 } 1563 } 1564 B->ops->destroy = MatDestroy_SeqBAIJ; 1565 B->ops->view = MatView_SeqBAIJ; 1566 B->factor = 0; 1567 B->lupivotthreshold = 1.0; 1568 B->mapping = 0; 1569 b->row = 0; 1570 b->col = 0; 1571 b->icol = 0; 1572 b->reallocs = 0; 1573 b->saved_values = 0; 1574 1575 b->m = m; B->m = m; B->M = m; 1576 b->n = n; B->n = n; B->N = n; 1577 1578 ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr); 1579 ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr); 1580 1581 b->mbs = mbs; 1582 b->nbs = nbs; 1583 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(b->imax); 1584 if (!nnz) { 1585 if (nz == PETSC_DEFAULT) nz = 5; 1586 else if (nz <= 0) nz = 1; 1587 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1588 nz = nz*mbs; 1589 } else { 1590 nz = 0; 1591 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1592 } 1593 1594 /* allocate the matrix space */ 1595 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int); 1596 b->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a); 1597 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1598 b->j = (int *) (b->a + nz*bs2); 1599 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1600 b->i = b->j + nz; 1601 b->singlemalloc = PETSC_TRUE; 1602 1603 b->i[0] = 0; 1604 for (i=1; i<mbs+1; i++) { 1605 b->i[i] = b->i[i-1] + b->imax[i-1]; 1606 } 1607 1608 /* b->ilen will count nonzeros in each block row so far. */ 1609 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1610 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1611 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1612 1613 b->bs = bs; 1614 b->bs2 = bs2; 1615 b->mbs = mbs; 1616 b->nz = 0; 1617 b->maxnz = nz*bs2; 1618 b->sorted = PETSC_FALSE; 1619 b->roworiented = PETSC_TRUE; 1620 b->nonew = 0; 1621 b->diag = 0; 1622 b->solve_work = 0; 1623 b->mult_work = 0; 1624 b->spptr = 0; 1625 B->info.nz_unneeded = (double)b->maxnz; 1626 b->keepzeroedrows = PETSC_FALSE; 1627 1628 *A = B; 1629 ierr = OptionsHasName(PETSC_NULL,"-help", &flg);CHKERRQ(ierr); 1630 if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); } 1631 1632 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1633 "MatStoreValues_SeqBAIJ", 1634 (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1635 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1636 "MatRetrieveValues_SeqBAIJ", 1637 (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1638 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1639 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1640 (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1641 PetscFunctionReturn(0); 1642 } 1643 1644 #undef __FUNC__ 1645 #define __FUNC__ "MatDuplicate_SeqBAIJ" 1646 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1647 { 1648 Mat C; 1649 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1650 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1651 1652 PetscFunctionBegin; 1653 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix"); 1654 1655 *B = 0; 1656 PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView); 1657 PLogObjectCreate(C); 1658 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c); 1659 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1660 C->ops->destroy = MatDestroy_SeqBAIJ; 1661 C->ops->view = MatView_SeqBAIJ; 1662 C->factor = A->factor; 1663 c->row = 0; 1664 c->col = 0; 1665 c->icol = 0; 1666 c->saved_values = 0; 1667 c->keepzeroedrows = a->keepzeroedrows; 1668 C->assembled = PETSC_TRUE; 1669 1670 c->m = C->m = a->m; 1671 c->n = C->n = a->n; 1672 C->M = a->m; 1673 C->N = a->n; 1674 1675 c->bs = a->bs; 1676 c->bs2 = a->bs2; 1677 c->mbs = a->mbs; 1678 c->nbs = a->nbs; 1679 1680 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax); 1681 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen); 1682 for ( i=0; i<mbs; i++ ) { 1683 c->imax[i] = a->imax[i]; 1684 c->ilen[i] = a->ilen[i]; 1685 } 1686 1687 /* allocate the matrix space */ 1688 c->singlemalloc = PETSC_TRUE; 1689 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1690 c->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(c->a); 1691 c->j = (int *) (c->a + nz*bs2); 1692 c->i = c->j + nz; 1693 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1694 if (mbs > 0) { 1695 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1696 if (cpvalues == MAT_COPY_VALUES) { 1697 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1698 } else { 1699 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1700 } 1701 } 1702 1703 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1704 c->sorted = a->sorted; 1705 c->roworiented = a->roworiented; 1706 c->nonew = a->nonew; 1707 1708 if (a->diag) { 1709 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(c->diag); 1710 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1711 for ( i=0; i<mbs; i++ ) { 1712 c->diag[i] = a->diag[i]; 1713 } 1714 } else c->diag = 0; 1715 c->nz = a->nz; 1716 c->maxnz = a->maxnz; 1717 c->solve_work = 0; 1718 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1719 c->mult_work = 0; 1720 *B = C; 1721 ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1722 PetscFunctionReturn(0); 1723 } 1724 1725 #undef __FUNC__ 1726 #define __FUNC__ "MatLoad_SeqBAIJ" 1727 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1728 { 1729 Mat_SeqBAIJ *a; 1730 Mat B; 1731 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1732 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1733 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1734 int *masked, nmask,tmp,bs2,ishift; 1735 Scalar *aa; 1736 MPI_Comm comm = ((PetscObject) viewer)->comm; 1737 1738 PetscFunctionBegin; 1739 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1740 bs2 = bs*bs; 1741 1742 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1743 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor"); 1744 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1745 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1746 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object"); 1747 M = header[1]; N = header[2]; nz = header[3]; 1748 1749 if (header[3] < 0) { 1750 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ"); 1751 } 1752 1753 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 1754 1755 /* 1756 This code adds extra rows to make sure the number of rows is 1757 divisible by the blocksize 1758 */ 1759 mbs = M/bs; 1760 extra_rows = bs - M + bs*(mbs); 1761 if (extra_rows == bs) extra_rows = 0; 1762 else mbs++; 1763 if (extra_rows) { 1764 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1765 } 1766 1767 /* read in row lengths */ 1768 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1769 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1770 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1771 1772 /* read in column indices */ 1773 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) );CHKPTRQ(jj); 1774 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1775 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1776 1777 /* loop over row lengths determining block row lengths */ 1778 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1779 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1780 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) );CHKPTRQ(mask); 1781 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1782 masked = mask + mbs; 1783 rowcount = 0; nzcount = 0; 1784 for ( i=0; i<mbs; i++ ) { 1785 nmask = 0; 1786 for ( j=0; j<bs; j++ ) { 1787 kmax = rowlengths[rowcount]; 1788 for ( k=0; k<kmax; k++ ) { 1789 tmp = jj[nzcount++]/bs; 1790 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1791 } 1792 rowcount++; 1793 } 1794 browlengths[i] += nmask; 1795 /* zero out the mask elements we set */ 1796 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1797 } 1798 1799 /* create our matrix */ 1800 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1801 B = *A; 1802 a = (Mat_SeqBAIJ *) B->data; 1803 1804 /* set matrix "i" values */ 1805 a->i[0] = 0; 1806 for ( i=1; i<= mbs; i++ ) { 1807 a->i[i] = a->i[i-1] + browlengths[i-1]; 1808 a->ilen[i-1] = browlengths[i-1]; 1809 } 1810 a->nz = 0; 1811 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1812 1813 /* read in nonzero values */ 1814 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1815 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1816 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1817 1818 /* set "a" and "j" values into matrix */ 1819 nzcount = 0; jcount = 0; 1820 for ( i=0; i<mbs; i++ ) { 1821 nzcountb = nzcount; 1822 nmask = 0; 1823 for ( j=0; j<bs; j++ ) { 1824 kmax = rowlengths[i*bs+j]; 1825 for ( k=0; k<kmax; k++ ) { 1826 tmp = jj[nzcount++]/bs; 1827 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1828 } 1829 rowcount++; 1830 } 1831 /* sort the masked values */ 1832 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1833 1834 /* set "j" values into matrix */ 1835 maskcount = 1; 1836 for ( j=0; j<nmask; j++ ) { 1837 a->j[jcount++] = masked[j]; 1838 mask[masked[j]] = maskcount++; 1839 } 1840 /* set "a" values into matrix */ 1841 ishift = bs2*a->i[i]; 1842 for ( j=0; j<bs; j++ ) { 1843 kmax = rowlengths[i*bs+j]; 1844 for ( k=0; k<kmax; k++ ) { 1845 tmp = jj[nzcountb]/bs ; 1846 block = mask[tmp] - 1; 1847 point = jj[nzcountb] - bs*tmp; 1848 idx = ishift + bs2*block + j + bs*point; 1849 a->a[idx] = aa[nzcountb++]; 1850 } 1851 } 1852 /* zero out the mask elements we set */ 1853 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1854 } 1855 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix"); 1856 1857 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1858 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1859 ierr = PetscFree(aa);CHKERRQ(ierr); 1860 ierr = PetscFree(jj);CHKERRQ(ierr); 1861 ierr = PetscFree(mask);CHKERRQ(ierr); 1862 1863 B->assembled = PETSC_TRUE; 1864 1865 ierr = MatView_Private(B);CHKERRQ(ierr); 1866 PetscFunctionReturn(0); 1867 } 1868 1869 1870 1871